12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004 |
- This file describes changes in recent versions of Slurm. It primarily
- documents those changes that are of interest to users and administrators.
- * Changes in Slurm 16.05.2
- ==========================
- -- CRAY - Fix issue where the proctrack plugin could hang if the container
- id wasn't able to be made.
- -- Move test for job wait reason value of BurstBufferResources and
- BurstBufferStageIn later in the scheduling logic.
- -- Document which srun options apply to only job, only step, or job and step
- allocations.
- -- Use more compatible function to get thread name (>= 2.6.11).
- -- Fix order of job then step id when noting cleaning flag being set.
- -- Make it so the extern step sends a message with accounting information
- back to the slurmctld.
- -- Make it so the extern step calls the select_g_step_start|finish functions.
- -- Don't print error when extern step is canceled because job is ending.
- -- Handle a few error codes when dealing with the extern step to make sure
- we have the pids added to the system correctly.
- -- Add support for job dependencies with job array expressions. Previous logic
- required listing each task of job array individually.
- -- Make sure tres_cnt is set before creating a slurmdb_assoc_usage_t.
- -- Prevent backfill scheduler from starting a second "singleton" job if another
- one started during a backfill sleep.
- -- Fix for invalid array pointer when creating advanced reservation when job
- allocations span heterogeneous nodes (differing core or socket counts).
- -- Fix hostlist_ranged_string_xmalloc_dims to correctly not put brackets on
- hostlists when brackets == 0.
- -- Make sure we don't get brackets when making a range of reserved ports
- for a step.
- -- Change fatal to an error if port ranges aren't correct when reading state
- for steps.
- * Changes in Slurm 16.05.1
- ==========================
- -- Fix __cplusplus macro in spank.h to allow compilation with C++.
- -- Fix compile issue with older glibc < 2.12
- -- Fix for starting batch step with mpi/pmix plugin.
- -- Fix for "scontrol -dd show job" with respect to displaying the specific
- CPUs allocated to a job on each node. Prior logic would only display
- the CPU information for the first node in the job allocation.
- -- Print correct return code on failure to update active node features
- through sview.
- -- Allow QOS timelimit to override partition timelimit when EnforcePartLimits
- is set to all/any.
- -- Make it so qsub will do a "basename" on a wrapped command for the output
- and error files.
- -- Fix issue where slurmd could core when running the ipmi energy plugin.
- -- Documentation - clean up typos.
- -- Add logic so that slurmstepd can be launched under valgrind.
- -- Increase buffer size to read /proc/*/stat files.
- -- Fix for tracking job resource allocation when slurmctld is reconfigured
- while Cray Node Health Check (NHC) is running. Previous logic would fail to
- record the job's allocation then perform release operation upon NHC
- completion, resulting in underflow error messages.
- -- Make "scontrol show daemons" work with long node names.
- -- CRAY - Collect energy using a uint64_t instead of uint32_t.
- -- Fix incorrect if statements when determining if the user has a default
- account or wckey.
- -- Prevent job stuck in configuring state if slurmctld daemon restarted while
- PrologSlurmctld is running. Also re-issue burst_buffer/pre-load operation
- as needed.
- -- Correct task affinity support for FreeBSD.
- -- Fix for task affinity on KNL in SNC2/Flat mode.
- -- Recalculate a job's memory allocation after node reboot if job requests all
- of a node's memory and FastSchedule=0 is configured. Intel KNL memory size
- can change on reboot with various MCDRAM modes.
- -- Fix small memory leak when printing HealthCheckNodeState.
- -- Eliminate memory leaks when AuthInfo is configured.
- -- Improve sdiag output description in man page.
- -- Cray/capmc_resume script modify a node's features (as needed) when the
- reinit (reboot) command is issued rather than wait for the nodes to change
- to the "on" state.
- -- Correctly print ranges when using step values in job arrays.
- -- Allow from file names / paths over 256 characters when launching steps,
- as well as spaces in the executable name.
- -- job_submit.license.lua example modified to send message back to user.
- -- Document job --mem=0 option means all memory on a node.
- -- Set SLURM_JOB_QOS environment variable to QOS name instead of description.
- -- knl_cray.conf file option of CnselectPath added.
- -- node_features/knl_cray plugin modified to get current node NUMA and MCDRAM
- modes using cnselect command rather than capmc command.
- -- liblua - add SLES12 paths to runtime search list.
- -- Fix qsub default output and error files for task arrays.
- -- Fix qsub to set job_name correctly when wrapping a script (-b y)
- -- Cray - set EnforcePartLimits=any in slurm.conf template.
- * Changes in Slurm 16.05.0
- ==========================
- -- Update seff to fix warnings with ncpus, and list slurm-perlapi dependency
- in spec file.
- -- Fix testsuite to consistent use /usr/bin/env {bash,expect} construct.
- -- Cray - Ensure that step completion messages get to the database.
- -- Fix step cpus_per_task calculation for heterogeneous job allocation.
- -- Fix --with-json= configure option to use specified path.
- -- Add back thread_id to "thread_id" LogTimeFormat to distinguish between
- mutliple threads with the same name. Now displays thread name and id.
- -- Change how Slurm determines the NUMA count of a node. Ignore KNL NUMA
- that only include memory.
- -- Cray - Fix node list parsing in capmc_suspend/resume programs.
- -- Fix sbatch #BSUB parsing for -W and -M options.
- -- Fix GRES task layout bug that could cause slurmctld to abort.
- -- Fix to --gres-flags=enforce-binding logic when multiple sockets needed.
- * Changes in Slurm 16.05.0rc2
- =============================
- -- Cray node shutdown/reboot scripts, perform operations on all nodes in one
- capmc command. Only if that fails, issue the operations in parallel on
- individual nodes. Required for scalability.
- -- Cleanup two minor Coverity warnings.
- -- Make it so the tres units in a job's formatted string are converted like
- they are in a step.
- -- Correct partition's MaxCPUsPerNode enforcement when nodes are shared by
- multiple partitions.
- -- node_feature/knl_cray - Prevent slurmctld GRES errors for "hbm" references.
- -- Display thread name instead of thread id and remove process name in stderr
- logging for "thread_id" LogTimeFormat.
- -- Log IP address of bad incomming message to slurmctld.
- -- If a user requests tasks, nodes and ntasks-per-node and
- tasks-per-node/nodes != tasks print warning and ignore ntasks-per-node.
- -- Release CPU "owner" file locks.
- -- Fix for job step memory allocation: Reject invalid step at submit time
- rather than leaving it queued.
- -- Whenever possible, avoid allocating nodes that require a reboot.
- * Changes in Slurm 16.05.0rc1
- ==============================
- -- Remove the SchedulerParameters option of "assoc_limit_continue", making it
- the default value. Add option of "assoc_limit_stop". If "assoc_limit_stop"
- is set and a job cannot start due to association limits, then do not attempt
- to initiate any lower priority jobs in that partition. Setting this can
- decrease system throughput and utlization, but avoid potentially starving
- larger jobs by preventing them from launching indefinitely.
- -- Update a node's socket and cores per socket counts as needed after a node
- boot to reflect configuration changes which can occur on KNL processors.
- Note that the node's total core count must not change, only the distribution
- of cores across varying socket counts (KNL NUMA nodes treated as sockets by
- Slurm).
- -- Rename partition configuration from "Shared" to "OverSubscribe". Rename
- salloc, sbatch, srun option from "--shared" to "--oversubscribe". The old
- options will continue to function. Output field names also changed in
- scontrol, sinfo, squeue and sview.
- -- Add SLURM_UMASK environment variable to user job.
- -- knl_conf: Added new configuration parameter of CapmcPollFreq.
- -- squeue: remove errant spaces in column formats for "squeue -o %all".
- -- Add ARRAY_TASKS mail option to send emails to each task in a job array.
- -- Change default compression library for sbcast to lz4.
- -- select/cray - Initiate step node health check at start of step termination
- rather than after application completely ends so that NHC can capture
- information about hung (non-killable) processes.
- -- Add --units=[KMGTP] option to sacct to display values in specific unit type.
- -- Modify sacct and sacctmgr to display TRES values in converted units.
- -- Modify sacctmgr to accept TRES values with [KMGTP] suffixes.
- -- Replace hash function with more modern SipHash functions.
- -- Add "--with-cray_dir" build/configure option.
- -- BB- Only send stage_out email when stage_out is set in script.
- -- Add r/w locking to file_bcast receive functions in slurmd.
- -- Add TopologyParam option of "TopoOptional" to optimize network topology
- only for jobs requesting it.
- -- Fix build on FreeBSD.
- -- Configuration parameter "CpuFreqDef" used to set default governor for job
- step not specifying --cpu-freq (previously the parameter was unused).
- -- Fix sshare -o<format> to correctly display new lengths.
- -- Update documentation to rename Shared option to OverSubscribe.
- -- Update documentation to rename partition Priority option to PriorityTier.
- -- Prevent changing of QOS on running jobs.
- -- Update accounting when changing QOS on pending jobs.
- -- Add support to ntasks_per_socket in task/affinity.
- -- Generate init.d and systemd service scripts in etc/ through Make rather
- than at configure time to ensure all variable substitutions happen.
- -- Use TaskPluginParam for default task binding if no user specified CPU
- binding. User --cpu_bind option takes precident over default. No longer
- any error if user --cpu_bind option does not match TaskPluginParam.
- -- Make sacct and sattach work with older slurmd versions.
- -- Fix protocol handling between 15.08 and 16.05 for 'scontrol show config'.
- -- Enable prefixes (e.g. info, debug, etc.) in slurmstepd debugging.
- * Changes in Slurm 16.05.0pre2
- ==============================
- -- Split partition's "Priority" field into "PriorityTier" (used to order
- partitions for scheduling and preemption) plus "PriorityJobFactor" (used by
- priority/multifactor plugin in calculating job priority, which is used to
- order jobs within a partition for scheduling).
- -- Revert call to getaddrinfo, restoring gethostbyaddr (introduced in Slurm
- 16.05.0pre1) which was failing on some systems.
- -- knl_cray.conf - Added AllowMCDRAM, AllowNUMA and ALlowUserBoot
- configuration options.
- -- Add node_features_p_user_update() function to node_features plugin.
- -- Don't print Weight=1 lines in 'scontrol write config' (its the default).
- -- Remove PARAMS macro from slurm.h.
- -- Remove BEGIN_C_DECLS and END_C_DECLS macros.
- -- Check that PowerSave mode configured for node_features/knl_cray plugin.
- It is required to reconfigure and reboot nodes.
- -- Update documentation to reflect new cgroup default location change from
- /cgroup to /sys/fs/cgroup.
- -- If NodeHealthCheckProgram configured HealthCheckInterval is non-zero, then
- modify slurmd to run it before registering with slurmctld.
- -- Fix for tasks being packed onto cores when the requested --cpus-per-task is
- greater than the number of threads on a core and --ntasks-per-core is 1.
- -- Make it so jobs/steps track ':' named gres/tres, before hand gres/gpu:tesla
- would only track gres/gpu, now it will track both gres/gpu and
- gres/gpu:tesla as separate gres if configured like
- AccountingStorageTRES=gres/gpu,gres/gpu:tesla
- -- Added new job dependency type of "aftercorr" which will start a task of a
- job array after the corresponding task of another job array completes.
- -- Increase default MaxTasksPerNode configuration parameter from 128 to 512.
- -- Enable sbcast data compression logic (compress option previously ignored).
- -- Add --compress option to srun command for use with --bcast option.
- -- Add TCPTimeout option to slurm[dbd].conf. Decouples MessageTimeout from TCP
- connections.
- -- Don't call primary controller for every RPC when backup is in control.
- -- Add --gres-flags=enforce-binding option to salloc, sbatch and srun commands.
- If set, the only CPUs available to the job will be those bound to the
- selected GRES (i.e. the CPUs identifed in the gres.conf file will be
- strictly enforced rather than advisory).
- -- Change how a node's allocated CPU count is calculated to avoid double
- counting CPUs allocated to multiple jobs at the same time.
- -- Added SchedulingParameters option of "bf_min_prio_reserve". Jobs below
- the specified threshold will not have resources reserved for them.
- -- Added "sacctmgr show lostjobs" to report any orphaned jobs in the database.
- -- When a stepd is about to shutdown and send it's response to srun
- make the wait to return data only hit after 500 nodes and configurable
- based on the TcpTimeout value.
- -- Add functionality to reset the lft and rgt values of the association table
- with the slurmdbd.
- -- Add SchedulerParameter no_env_cache, if set no env cache will be use when
- launching a job, instead the job will fail and drain the node if the env
- isn't loaded normally.
- * Changes in Slurm 16.05.0pre1
- ==============================
- -- Add sbatch "--wait" option that waits for job completion before exiting.
- Exit code will match that of spawned job.
- -- Modify advanced reservation save/restore logic for core reservations to
- support configuration changes (changes in configured nodes or cores counts).
- -- Allow ControlMachine, BackupController, DbdHost and DbdBackupHost to be
- either short or long hostname.
- -- Job output and error files can now contain "%" character by specifying
- a file name with two consecutive "%" characters. For example,
- "sbatch -o "slurm.%%.%j" for job ID 123 will generate an output file named
- "slurm.%.123".
- -- Pass user name in Prolog RPC from controller to slurmd when using
- PrologFlags=Alloc. Allows SLURM_JOB_USER env variable to be set when using
- Native Slurm on a Cray.
- -- Add "NumTasks" to job information visible to Slurm commands.
- -- Add mail wrapper script "smail" that will include job statistics in email
- notification messages.
- -- Remove vestigial "SICP" job option (inter-cluster job option). Completely
- different logic will be forthcoming.
- -- Fix case where the primary and backup dbds would both be performing rollup.
- -- Add an ack reply from slurmd to slurmstepd when job setup is done and the
- job is ready to be executed.
- -- Removed support for authd. authd has not been developed and supported since
- several years.
- -- Introduce a new parameter requeue_setup_env_fail in SchedulerParameters.
- A job that fails to setup the environment will be requeued and the node
- drained.
- -- Add ValidateTimeout and OtherTimeout to "scontrol show burst" output.
- -- Increase default sbcast buffer size from 512KB to 8MB.
- -- Enable the hdf5 profiling of the batch step.
- -- Eliminate redundant environment and script files for job arrays.
- -- Stop searching sbatch scripts for #PBS directives after 100 lines of
- non-comments. Stop parsing #PBS or #SLURM directives after 1024 characters
- into a line. Required for decent perforamnce with huge scripts.
- -- Add debug flag for timing Cray portions of the code.
- -- Remove all *.la files from RPMs.
- -- Add Multi-Category Security (MCS) infrastructure to permit nodes to be bound
- to specific users or groups.
- -- Install the pmi2 unix sockets in slurmd spool directory instead of /tmp.
- -- Implement the getaddrinfo and getnameinfo instead of gethostbyaddr and
- gethostbyname.
- -- Finished PMIx implementation.
- -- Implemented the --without=package option for configure.
- -- Fix sshare to show each individual cluster with -M,--clusters option.
- -- Added --deadline option to salloc, sbatch and srun. Jobs which can not be
- completed by the user specified deadline will be terminated with a state of
- "Deadline" or "DL".
- -- Implemented and documented PMIX protocol which is used to bootstrap an
- MPI job. PMIX is an alternative to PMI and PMI2.
- -- Change default CgroupMountpoint (in cgroup.conf) from "/cgroup" to
- "/sys/fs/cgroup" to match current standard.
- -- Add #BSUB options to sbatch to read in from the batch script.
- -- HDF: Change group name of node from nodename to nodeid.
- -- The partition-specific SelectTypeParameters parameter can now be used to
- change the memory allocation tracking specification in the global
- SelectTypeParameters configuration parameter. Supported partition-specific
- values are CR_Core, CR_Core_Memory, CR_Socket and CR_Socket_Memory. If the
- global SelectTypeParameters value includes memory allocation management and
- the partition-specific value does not, then memory allocation management for
- that partition will NOT be supported (i.e. memory can be over-allocated).
- Likewise the global SelectTypeParameters might not include memory management
- while the partition-specific value does.
- -- Burst buffer/cray - Add support for multiple buffer pools including support
- for different resource granularity by pool.
- -- Burst buffer advanced reservation units treated as bytes (per documentation)
- rather than GB.
- -- Add an "scontrol top <jobid>" command to re-order the priorities of a user's
- pending jobs. May be disabled with the "disable_user_top" option in the
- SchedulerParameters configuration parameter.
- -- Modify sview to display negative job nice values.
- -- Increase job's nice value field from 16 to 32 bits.
- -- Remove deprecated job_submit/cnode plugin.
- -- Enhance slurm.conf option EnforcePartLimit to include options like "ANY" and
- "ALL". "Any" is equivalent to "Yes" and "All" will check all partitions
- a job is submitted to and if any partition limit is violated the job will
- be rejected even if it could possibly run on another partition.
- -- Add "features_act" field (currently active features) to the node
- information. Output of scontrol, sinfo, and sview changed accordingly.
- The field previously displayed as "Features" is now "AvailableFeatures"
- while the new field is displayed as "ActiveFeatures".
- -- Remove Sun Constellation, IBM Federation Switches (replaced by NRT switch
- plugin) and long-defunct Quadrics Elan support.
- -- Add -M<clusters> option to sreport.
- -- Rework group caching to work better in environments with
- enumeration disabled. Removed CacheGroups config directive, group
- membership lists are now always cached, controlled by
- GroupUpdateTime parameter. GroupUpdateForce parameter default
- value changed to 1.
- -- Add reservation flag of "purge_comp" which will purge an advanced
- reservation once it has no more active (pending, suspended or running) jobs.
- -- Add new configuration parameter "KNLPlugins" and plugin infrastructure.
- -- Add optional job "features" to node reboot RPC.
- -- Add slurmd "-b" option to report node rebooted at daemon start time. Used
- for testing purposes.
- -- contribs/cray: Add framework for powering nodes up and down.
- -- For job constraint, convert comma separator to "&".
- -- Add Max*PerAccount options for QOS.
- -- Protect slurm_mutex_* calls with abort() on failure.
- * Changes in Slurm 15.08.13
- ===========================
- -- Fix issue where slurmd could core when running the ipmi energy plugin.
- -- Print correct return code on failure to update node features through sview.
- -- Documentation - cleanup typos.
- -- Add logic so that slurmstepd can be launched under valgrind.
- -- Increase buffer size to read /proc/*/stat files.
- -- MYSQL - Handle ER_HOST_IS_BLOCKED better by failing when it occurs instead
- of continuously printing the message over and over as the problem will
- most likely not resolve itself.
- -- Add --disable-bluegene to configure. This will make it so Slurm
- can work on a BGAS node.
- -- Prevent job stuck in configuring state if slurmctld daemon restarted while
- PrologSlurmctld is running.
- -- Handle association correctly if using FAIR_TREE as well as shares=Parent
- -- Fix race condition when setting priority of a job and the association
- doesn't have a parent.
- -- MYSQL - Fix issue with adding a reservation if the name has single quotes in
- it.
- -- Correctly print ranges when using step values in job arrays.
- -- Fix for invalid array pointer when creating advanced reservation when job
- allocations span heterogeneous nodes (differing core or socket counts).
- * Changes in Slurm 15.08.12
- ===========================
- -- Do not attempt to power down a node which has never responded if the
- slurmctld daemon restarts without state.
- -- Fix for possible slurmstepd segfault on invalid user ID.
- -- MySQL - Fix for possible race condition when archiving multiple clusters
- at the same time.
- -- Fix compile for when you don't have hwloc.
- -- Fix issue where daemons would only listen on specific address given in
- slurm.conf instead of all. If looking for specific addresses use
- TopologyParam options No*InAddrAny.
- -- Cray - Better robustness when dealing with the aeld interface.
- -- job_submit.lua - add array_inx value for job arrays.
- -- Perlapi - Remove unneeded/undefined mutex.
- -- Fix issue when TopologyParam=NoInAddrAny is set the responses wouldn't
- make it to the slurmctld when using message aggregation.
- -- MySQL - Fix potential memory leak when rolling up data.
- -- Fix issue with clustername file when running on NFS with root_squash.
- -- Fix race condition with respects to cleaning up the profiling threads
- when in use.
- -- Fix issues when building on NetBSD.
- -- Fix jobcomp/elasticsearch build when libcurl is installed in a
- non-standard location.
- -- Fix MemSpecLimit to explicitly require TaskPlugin=task/cgroup and
- ConstrainRAMSpace set in cgroup.conf.
- -- MYSQL - Fix order of operations issue where if the database is locked up
- and the slurmctld doesn't wait long enough for the response it would give
- up leaving the connection open and create a situation where the next message
- sent could receive the response of the first one.
- -- Fix CFULL_BLOCK distribution type.
- -- Prevent sbatch from trying to enable debug messages when using job arrays.
- -- Prevent sbcast from enabling "--preserve" when specifying a jobid.
- -- Prevent wrong error message from spank plugin stack on GLOB_NOSPACE error.
- -- Fix proctrack/lua plugin to prevent possible deadlock.
- -- Prevent infinite loop in slurmstepd if execve fails.
- -- Prevent multiple responses to REQUEST_UPDATE_JOB_STEP message.
- -- Prevent possible deadlock in acct_gather_filesystem/lustre on error.
- -- Make it so --mail-type=NONE didn't throw an invalid error.
- -- If no default account is given for a user when creating (only a list of
- accounts) no default account is printed, previously NULL was printed.
- -- Fix for tracking a node's allocated CPUs with gang scheduling.
- -- Fix Hidden error during _rpc_forward_data call.
- -- Fix bug resulting from wrong order-of-operations in _connect_srun_cr(),
- and two others that cause incorrect debug messages.
- -- Fix backwards compatibility with sreport going to <= 14.11 coming from
- >= 15.08 for some reports.
- * Changes in Slurm 15.08.11
- ===========================
- -- Fix for job "--contiguous" option that could cause job allocation/launch
- failure or slurmctld crash.
- -- Fix to setup logs for single-character program names correctly.
- -- Backfill scheduling performance enhancement with large number of running
- jobs.
- -- Reset job's prolog_running counter on slurmctld restart or reconfigure.
- -- burst_buffer/cray - Update job's prolog_running counter if pre_run fails.
- -- MYSQL - Make the error message more specific when removing a reservation
- and it doesn't meet basic requirements.
- -- burst_buffer/cray - Fix for script creating or deleting persistent buffer
- would fail "paths" operation and hold the job.
- -- power/cray - Prevent possible divide by zero.
- -- power/cray - Fix bug introduced in 15.08.10 preventin operation in many
- cases.
- -- Prevent deadlock for flow of data to the slurmdbd when sending reservation
- that wasn't set up correctly.
- -- burst_buffer/cray - Don't call Datawarp "paths" function if script includes
- only create or destroy of persistent burst buffer. Some versions of Datawarp
- software return an error for such scripts, causing the job to be held.
- -- Fix potential issue when adding and removing TRES which could result
- in the slurmdbd segfaulting.
- -- Add cast to memory limit calculation to prevent integer overflow for
- very large memory values.
- -- Bluegene - Fix issue with reservations resizing under the covers on a
- restart of the slurmctld.
- -- Avoid error message of "Requested cpu_bind option requires entire node to
- be allocated; disabling affinity" being generated in some cases where
- task/affinity and task/cgroup plugins used together.
- -- Fix version issue when packing GRES information between 2 different versions
- of Slurm.
- -- Fix for srun hanging with OpenMPI and PMIx
- -- Better initialization of node_ptr when dealing with protocol_version.
- -- Fix incorrect type when initializing header of a message.
- -- MYSQL - Fix incorrect usage of limit and union.
- -- MYSQL - Remove 'ignore' from alter ignore when updating a table.
- -- Documentation - update prolog_epilog page to reflect current behavior
- if the Prolog fails.
- -- Documentation - clarify behavior of 'srun --export=NONE' in man page.
- -- Fix potential gres underflow on restart of slurmctld.
- -- Fix sacctmgr to remove a user who has no associations.
- * Changes in Slurm 15.08.10
- ===========================
- -- Fix issue where if a slurmdbd rollup lasted longer than 1 hour the
- rollup would effectively never run again.
- -- Make error message in the pmi2 code to debug as the issue can be expected
- and retries are done making the error message a little misleading.
- -- Power/cray: Don't specify NID list to Cray APIs. If any of those nodes are
- not in a ready state, the API returned an error for ALL nodes rather than
- valid data for nodes in ready state.
- -- Fix potential divide by zero when tree_width=1.
- -- checkpoint/blcr plugin: Fix memory leak.
- -- If using PrologFlags=contain: Don't launch the extern step if a job is
- cancelled while launching.
- -- Remove duplicates from AccountingStorageTRES
- -- Fix backfill scheduler race condition that could cause invalid pointer in
- select/cons_res plugin. Bug introduced in 15.08.9.
- -- Avoid double calculation on partition QOS if the job is using the same QOS.
- -- Do not change a job's time limit when updating unrelated field in a job.
- -- Fix situation on a heterogeneous memory cluster where the order of
- constraints mattered in a job.
- * Changes in Slurm 15.08.9
- ==========================
- -- BurstBuffer/cray - Defer job cancellation or time limit while "pre-run"
- operation in progress to avoid inconsistent state due to multiple calls
- to job termination functions.
- -- Fix issue with resizing jobs and limits not be kept track of correctly.
- -- BGQ - Remove redeclaration of job_read_lock.
- -- BGQ - Tighter locks around structures when nodes/cables change state.
- -- Make it possible to change CPUsPerTask with scontrol.
- -- Make it so scontrol update part qos= will take away a partition QOS from
- a partition.
- -- Fix issue where SocketsPerBoard didn't translate to Sockets when CPUS=
- was also given.
- -- Add note to slurm.conf man page about setting "--cpu_bind=no" as part
- of SallocDefaultCommand if a TaskPlugin is in use.
- -- Set correct reason when a QOS' MaxTresMins is violated.
- -- Insure that a job is completely launched before trying to suspend it.
- -- Remove historical presentations and design notes. Only distribute
- maintained doc/html and doc/man directories.
- -- Remove duplicate xmalloc() in task/cgroup plugin.
- -- Backfill scheduler to validate correct job partition for job submitted to
- multiple partitions.
- -- Force close on exec on first 256 file descriptors when launching a
- slurmstepd to close potential open ones.
- -- Step GRES value changed from type "int" to "int64_t" to support larger
- values.
- -- Fix getting reservations to database when database is down.
- -- Fix issue with sbcast not doing a correct fanout.
- -- Fix issue where steps weren't always getting the gres/tres involved.
- -- Fixed double read lock on getting job's gres/tres.
- -- Fix display for RoutePlugin parameter to display the correct value.
- -- Fix route/topology plugin to prevent segfault in sbcast when in use.
- -- Fix Cray slurmconfgen_smw.py script to use nid as nid, not nic.
- -- Fix Cray NHC spawning on job requeue. Previous logic would leave nodes
- allocated to a requeued job as non-usable on job termination.
- -- burst_buffer/cray plugin: Prevent a requeued job from being restarted while
- file stage-out is still in progress. Previous logic could restart the job
- and not perform a new stage-in.
- -- Fix job array formatting to allow return [0-100:2] display for arrays with
- step functions rather than [0,2,4,6,8,...] .
- -- FreeBSD - replace Linux-specific set_oom_adj to avoid errors in slurmd log.
- -- Add option for TopologyParam=NoInAddrAnyCtld to make the slurmctld listen
- on only one port like TopologyParam=NoInAddrAny does for everything else.
- -- Fix burst buffer plugin to prevent corruption of the CPU TRES data when bb
- is not set as an AccountingStorageTRES type.
- -- Surpress error messages in acct_gather_energy/ipmi plugin after repeated
- failures.
- -- Change burst buffer use completion email message from
- "SLURM Job_id=1360353 Name=tmp Staged Out, StageOut time 00:01:47" to
- "SLURM Job_id=1360353 Name=tmp StageOut/Teardown time 00:01:47"
- -- Generate burst buffer use completion email immediately afer teardown
- completes rather than at job purge time (likely minutes later).
- -- Fix issue when adding a new TRES to AccountingStorageTRES for the first
- time.
- -- Update gang scheduling tables when job manually suspended or resumed. Prior
- logic could mess up job suspend/resume sequencing.
- -- Update gang scheduling data structures when job changes in size.
- -- Associations - prevent hash table corruption if uid initially unset for
- a user, which can cause slurmctld to crash if that user is deleted.
- -- Avoid possibly aborting srun on SIGSTOP while creating the job step due to
- threading bug.
- -- Fix deadlock issue with burst_buffer/cray when a newly created burst
- buffer is found.
- -- burst_buffer/cray: Set environment variables just before starting job rather
- than at job submission time to reflect persistent buffers created or
- modified while the job is pending.
- -- Fix check of per-user qos limits on the initial run by a user.
- -- Fix gang scheduling resource selection bug which could prevent multiple jobs
- from being allocated the same resources. Bug was introduced in 15.08.6.
- -- Don't print the Rgt value of an association from the cache as it isn't
- kept up to date.
- -- burst_buffer/cray - If the pre-run operation fails then don't issue
- duplicate job cancel/requeue unless the job is still in run state. Prevents
- jobs hung in COMPLETING state.
- -- task/cgroup - Fix bug in task binding to CPUs.
- * Changes in Slurm 15.08.8
- ==========================
- -- Backfill scheduling properly synchronized with Cray Node Health Check.
- Prior logic could result in highest priority job getting improperly
- postponed.
- -- Make it so daemons also support TopologyParam=NoInAddrAny.
- -- If scancel is operating on large number of jobs and RPC responses from
- slurmctld daemon are slow then introduce a delay in sending the cancel job
- requests from scancel in order to reduce load on slurmctld.
- -- Remove redundant logic when updating a job's task count.
- -- MySQL - Fix querying jobs with reservations when the id's have rolled.
- -- Perl - Fix use of uninitialized variable in slurm_job_step_get_pids.
- -- Launch batch job requsting --reboot after the boot completes.
- -- Move debug messages like "not the right user" from association manager
- to debug3 when trying to find the correct association.
- -- Fix incorrect logic when querying assoc_mgr information.
- -- Move debug messages to debug3 notifying a gres_bit_alloc was NULL for
- gres types without a file.
- -- Sanity Check Patch to setup variables for RAPL if in a race for it.
- -- GRES - Fix minor typecast issues.
- -- burst_buffer/cray - Increase size of intermediate variable used to store
- buffer byte size read from DW instance from 32 to 64-bits to avoid overflow
- and reporting invalid buffer sizes.
- -- Allow an existing reservation with running jobs to be modified without
- Flags=IGNORE_JOBS.
- -- srun - don't attempt to execve() a directory with a name matching the
- requested command
- -- Do not automatically relocate an advanced reservation for individual cores
- that spans multiple nodes when nodes in that reservation go down (e.g.
- a 1 core reservation on node "tux1" will be moved if node "tux1" goes
- down, but a reservation containing 2 cores on node "tux1" and 3 cores on
- "tux2" will not be moved node "tux1" goes down). Advanced reservations for
- whole nodes will be moved by default for down nodes.
- -- Avoid possible double free of memory (and likely abort) for slurmctld in
- background mode.
- -- contribs/cray/csm/slurmconfgen_smw.py - avoid including repurposed compute
- nodes in configs.
- -- Support AuthInfo in slurmdbd.conf that is different from the value in
- slurm.conf.
- -- Fix build on FreeBSD 10.
- -- Fix hdf5 build on ppc64 by using correct fprintf formatting for types.
- -- Fix cosmetic printing of NO_VALs in scontrol show assoc_mgr.
- -- Fix perl api for newer perl versions.
- -- Fix for jobs requesting cpus-per-task (eg. -c3) that exceed the number of
- cpus on a core.
- -- Remove unneeded perl files from the .spec file.
- -- Flesh out filters for scontrol show assoc_mgr.
- -- Add function to remove assoc_mgr_info_request_t members without freeing
- structure.
- -- Fix build on some non-glibc systems by updating includes.
- -- Add new PowerParameters options of get_timeout and set_timeout. The default
- set_timeout was increased from 5 seconds to 30 seconds. Also re-read current
- power caps periodically or after any failed "set" operation.
- -- Fix slurmdbd segfault when listing users with blank user condition.
- -- Save the ClusterName to a file in SaveStateLocation, and use that to
- verify the state directory belongs to the given cluster at startup to avoid
- corruption from multiple clusters attempting to share a state directory.
- -- MYSQL - Fix issue when rerolling monthly data to work off correct time
- period. This would only hit you if you rerolled a 15.08 prior to this
- commit.
- -- If FastSchedule=0 is used make sure TRES are set up correctly in accounting.
- -- Fix sreport's truncation of columns with large TRES and not using
- a parsing option.
- -- Make sure count of boards are restored when slurmctld has option -R.
- -- When determine if a job can fit into a TRES time limit after resources
- have been selected set the time limit appropriately if the job didn't
- request one.
- -- Fix inadequate locks when updating a partition's TRES.
- -- Add new assoc_limit_continue flag to SchedulerParameters.
- -- Avoid race in acct_gather_energy_cray if energy requested before available.
- -- MYSQL - Avoid having multiple default accounts when a user is added to
- a new account and making it a default all at once.
- * Changes in Slurm 15.08.7
- ==========================
- -- sched/backfill: If a job can not be started within the configured
- backfill_window, set it's start time to 0 (unknown) rather than the end
- of the backfill_window.
- -- Remove the 1024-character limit on lines in batch scripts.
- -- burst_buffer/cray: Round up swap size by configured granularity.
- -- select/cray: Log repeated aeld reconnects.
- -- task/affinity: Disable core-level task binding if more CPUs required than
- available cores.
- -- Preemption/gang scheduling: If a job is suspended at slurmctld restart or
- reconfiguration time, then leave it suspended rather than resume+suspend.
- -- Don't use lower weight nodes for job allocation when topology/tree used.
- -- BGQ - If a cable goes into error state remove the under lying block on
- a dynamic system and mark the block in error on a static/overlap system.
- -- BGQ - Fix regression in 9cc4ae8add7f where blocks would be deleted on
- static/overlap systems when some hardware issue happens when restarting
- the slurmctld.
- -- Log if CLOUD node configured without a resume/suspend program or suspend
- time.
- -- MYSQL - Better locking around g_qos_count which was previously unprotected.
- -- Correct size of buffer used for jobid2str to avoid truncation.
- -- Fix allocation/distribution of tasks across multiple nodes when
- --hint=nomultithread is requested.
- -- If a reservation's nodes value is "all" then track the current nodes in the
- system, even if those nodes change.
- -- Fix formatting if using "tree" option with sreport.
- -- Make it so sreport prints out a line for non-existent TRES instead of
- error message.
- -- Set job's reason to "Priority" when higher priority job in that partition
- (or reservation) can not start rather than leaving the reason set to
- "Resources".
- -- Fix memory corruption when a new non-generic TRES is added to the
- DBD for the first time. The corruption is only noticed at shutdown.
- -- burst_buffer/cray - Improve tracking of allocated resources to handle race
- condition when reading state while buffer allocation is in progress.
- -- If a job is submitted only with -c option and numcpus is updated before
- the job starts update the cpus_per_task appropriately.
- -- Update salloc/sbatch/srun documentation to mention time granularity.
- -- Fixed memory leak when freeing assoc_mgr_info_msg_t.
- -- Prevent possible use of empty reservation core bitmap, causing abort.
- -- Remove unneeded pack32's from qos_rec when qos_rec is NULL.
- -- Make sacctmgr print MaxJobsPerUser when adding/altering a QOS.
- -- Correct dependency formatting to print array task ids if set.
- -- Update sacctmgr help with current QOS options.
- -- Update slurmstepd to initialize authentication before task launch.
- -- burst_cray/cray: Eliminate need for dedicated nodes.
- -- If no MsgAggregationParams is set don't set the internal string to
- anything. The slurmd will process things correctly after the fact.
- -- Fix output from api when printing job step not found.
- -- Don't allow user specified reservation names to disrupt the normal
- reservation sequeuece numbering scheme.
- -- Fix scontrol to be able to accept TRES as an option when creating
- a reservation.
- -- contrib/torque/qstat.pl - return exit code of zero even with no records
- printed for 'qstat -u'.
- -- When a reservation is created or updated, compress user provided node names
- using hostlist functions (e.g. translate user input of "Nodes=tux1,tux2"
- into "Nodes=tux[1-2]").
- -- Change output routines for scontrol show partition/reservation to handle
- unexpectedly large strings.
- -- Add more partition fields to "scontrol write config" output file.
- -- Backfill scheduling fix: If a job can't be started due to a "group" resource
- limit, rather than reserve resources for it when the next job ends, don't
- reserve any resources for it.
- -- Avoid slurmstepd abort if malloc fails during accounting gather operation.
- -- Fix nodes from being overallocated when allocation straddles multiple nodes.
- -- Fix memory leak in slurmctld job array logic.
- -- Prevent decrementing of TRESRunMins when AccountingStorageEnforce=limits is
- not set.
- -- Fix backfill scheduling bug which could postpone the scheduling of jobs due
- to avoidance of nodes in COMPLETING state.
- -- Properly account for memory, CPUs and GRES when slurmctld is reconfigured
- while there is a suspended job. Previous logic would add the CPUs, but not
- memory or GPUs. This would result in underflow/overflow errors in select
- cons_res plugin.
- -- Strip flags from a job state in qstat wrapper before evaluating.
- -- Add missing job states from the qstat wrapper.
- -- Cleanup output routines to reduce number of fixed-sized buffer function
- calls and allow for unexpectedly large strings.
- * Changes in Slurm 15.08.6
- ==========================
- -- In slurmctld log file, log duplicate job ID found by slurmd. Previously was
- being logged as prolog/epilog failure.
- -- If a job is requeued while in the process of being launch, remove it's
- job ID from slurmd's record of active jobs in order to avoid generating a
- duplicate job ID error when launched for the second time (which would
- drain the node).
- -- Cleanup messages when handling job script and environment variables in
- older directory structure formats.
- -- Prevent triggering gang scheduling within a partition if configured with
- PreemptType=partition_prio and PreemptMode=suspend,gang.
- -- Decrease parallelism in job cancel request to prevent denial of service
- when cancelling huge numbers of jobs.
- -- If all ephemeral ports are in use, try using other port numbers.
- -- Revert way lib lua is handled when doing a dlopen, fixing a regression in
- 15.08.5.
- -- Set the debug level of the rmdir message in xcgroup_delete() to debug2.
- -- Fix the qstat wrapper when user is removed from the system but still
- has running jobs.
- -- Log the request to terminate a job at info level if DebugFlags includes
- the Steps keyword.
- -- Fix potential memory corruption in _slurm_rpc_epilog_complete as well as
- _slurm_rpc_complete_job_allocation.
- -- Fix cosmetic display of AccountingStorageEnforce option "nosteps" when
- in use.
- -- If a job can never be started due to unsatisfied job dependencies, report
- the full original job dependency specification rather than the dependencies
- remaining to be satisfied (typically NULL).
- -- Refactor logic to synchronize active batch jobs and their script/environment
- files, reducing overhead dramatically for large numbers of active jobs.
- -- Avoid hard-link/copy of script/environment files for job arrays. Use the
- master job record file for all tasks of the job array.
- NOTE: Job arrays submitted to Slurm version 15.08.6 or later will fail if
- the slurmctld daemon is downgraded to an earlier version of Slurm.
- -- Move slurmctld mail handler to separate thread for improved performance.
- -- Fix containment of adopted processes from pam_slurm_adopt.
- -- If a pending job array has multiple reasons for being in a pending state,
- then print all reasons in a comma separated list.
- * Changes in Slurm 15.08.5
- ==========================
- -- Prevent "scontrol update job" from updating jobs that have already finished.
- -- Show requested TRES in "squeue -O tres" when job is pending.
- -- Backfill scheduler: Test association and QOS node limits before reserving
- resources for pending job.
- -- burst_buffer/cray: If teardown operations fails, sleep and retry.
- -- Clean up the external pids when using the PrologFlags=Contain feature
- and the job finishes.
- -- burst_buffer/cray: Support file staging when job lacks job-specific buffer
- (i.e. only persistent burst buffers).
- -- Added srun option of --bcast to copy executable file to compute nodes.
- -- Fix for advanced reservation of burst buffer space.
- -- BurstBuffer/cray: Add logic to terminate dw_wlm_cli child processes at
- shutdown.
- -- If job can't be launch or requeued, then terminate it.
- -- BurstBuffer/cray: Enable clearing of burst buffer string on completed job
- as a means of recovering from a failure mode.
- -- Fix wrong memory free when parsing SrunPortRange=0-0 configuration.
- -- BurstBuffer/cray: Fix job record purging if cancelled from pending state.
- -- BGQ - Handle database throw correctly when syncing users on blocks.
- -- MySQL - Make sure we don't have a NULL string returned when not
- requesting any specific association.
- -- sched/backfill: If max_rpc_cnt is configured and the backlog of RPCs has
- not cleared after yielding locks, then continue to sleep.
- -- Preserve the job dependency description displayed in 'scontrol show job'
- even if the dependee jobs was terminated and cleaned causing the
- dependent to never run because of DependencyNeverSatisfied.
- -- Correct job task count calculation if only node count and ntasks-per-node
- options supplied.
- -- Make sure the association manager converts any string to be lower case
- as all the associations from the database will be lower case.
- -- Sanity check for xcgroup_delete() to verify incoming parameter is valid.
- -- Fix formatting for sacct with variables that switched from uint32_t to
- uint64_t.
- -- Fix a typo in sacct man page.
- -- Set up extern step to track any children of an ssh if it leaves anything
- else behind.
- -- Prevent slurmdbd divide by zero if no associations defined at rollup time.
- -- Multifactor - Add sanity check to make sure pending jobs are handled
- correctly when PriorityFlags=CALCULATE_RUNNING is set.
- -- Add slurmdb_find_tres_count_in_string() to slurm db perl api.
- -- Make lua dlopen() conditional on version found at build.
- -- sched/backfill - Delay backfill scheduler for completing jobs only if
- CompleteWait configuration parameter is set (make code match documentation).
- -- Release a job's allocated licenses only after epilog runs on all nodes
- rather than at start of termination process.
- -- Cray job NHC delayed until after burst buffer released and epilog completes
- on all allocated nodes.
- -- Fix abort of srun if using PrologFlags=NoHold
- -- Let devices step_extern cgroup inherit attributes of job cgroup.
- -- Add new hook to Task plugin to be able to put adopted processes in the
- step_extern cgroups.
- -- Fix AllowUsers documentation in burst_buffer.conf man page. Usernames are
- comma separated, not colon delimited.
- -- Fix issue with time limit not being set correctly from a QOS when a job
- requests no time limit.
- -- Various CLANG fixes.
- -- In both sched/basic and backfill: If a job can not be started due to some
- account/qos limit, then don't start other jobs which could delay jobs. The
- old logic would skip the job and start other jobs, which could delay the
- higher priority job.
- -- select/cray: Prevent NHC from running more than once per job or step.
- -- Fix fields not properly printed when adding an account through sacctmgr.
- -- Update LBNL Node Health Check (NHC) link on FAQ.
- -- Fix multifactor plugin to prevent slurmctld from getting segmentation fault
- should the tres_alloc_cnt be NULL.
- -- sbatch/salloc - Move nodelist logic before the time min_nodes is used
- so we can set it correctly before tasks are set.
- * Changes in Slurm 15.08.4
- ==========================
- -- Fix typo for the "devices" cgroup subsystem in pam_slurm_adopt.c
- -- Fix TRES_MAX flag to work correctly.
- -- Improve the systemd startup files.
- -- Added burst_buffer.conf flag parameter of "TeardownFailure" which will
- teardown and remove a burst buffer after failed stage-in or stage-out.
- By default, the buffer will be preserved for analysis and manual teardown.
- -- Prevent a core dump in srun if the signal handler runs during the job
- allocation causing the step context to be NULL.
- -- Don't fail job if multiple prolog operations in progress at slurmctld
- restart time.
- -- Burst_buffer/cray: Fix to purge terminated jobs with burst buffer errors.
- -- Burst_buffer/cray: Don't stall scheduling of other jobs while a stage-in
- is in progress.
- -- Make it possible to query 'extern' step with sstat.
- -- Make 'extern' step show up in the database.
- -- MYSQL - Quote assoc table name in mysql query.
- -- Make SLURM_ARRAY_TASK_MIN, SLURM_ARRAY_TASK_MAX, and SLURM_ARRAY_TASK_STEP
- environment variables available to PrologSlurmctld and EpilogSlurmctld.
- -- Fix slurmctld bug in which a pending job array could be canceled
- by a user different from the owner or the administrator.
- -- Support taking node out of FUTURE state with "scontrol reconfig" command.
- -- Sched/backfill: Fix to properly enforce SchedulerParameters of
- bf_max_job_array_resv.
- -- Enable operator to reset sdiag data.
- -- jobcomp/elasticsearch plugin: Add array_job_id and array_task_id fields.
- -- Remove duplicate #define IS_NODE_POWER_UP.
- -- Added SchedulerParameters option of max_script_size.
- -- Add REQUEST_ADD_EXTERN_PID option to add pid to the slurmstepd's extern
- step.
- -- Add unique identifiers to anchor tags in HTML generated from the man pages.
- -- Add with_freeipmi option to spec file.
- -- Minor elasticsearch code improvements
- * Changes in Slurm 15.08.3
- ==========================
- -- Correct Slurm's RPM build if Munge is not installed.
- -- Job array termination status email ExitCode based upon highest exit code
- from any task in the job array rather than the last task. Also change the
- state from "Ended" or "Failed" to "Mixed" where appropriate.
- -- Squeue recombines pending job array records only if their name and partition
- are identical.
- -- Fix some minor leaks in the job info and step info API.
- -- Export missing QOS id when filling in association with the association
- manager.
- -- Fix invalid reference if a lua job_submit plugin references a default qos
- when a user doesn't exist in the database.
- -- Use association enforcement in the lua plugin.
- -- Fix a few spots missing defines of accounting_enforce or acct_db_conn
- in the plugins.
- -- Show requested TRES in scontrol show jobs when job is pending.
- -- Improve sched/backfill support for job features, especially XOR construct.
- -- Correct scheduling logic for job features option with XOR construct that
- could delay a job's initiation.
- -- Remove unneeded frees when creating a tres string.
- -- Send a tres_alloc_str for the batch step
- -- Fix incorrect check for slurmdb_find_tres_count_in_string in various places,
- it needed to check for INFINITE64 instead of zero.
- -- Don't allow scontrol to create partitions with the name "DEFAULT".
- -- burst_buffer/cray: Change error from "invalid request" to "permssion denied"
- if a non-authorized user tries to create/destroy a persistent buffer.
- -- PrologFlags work: Setting a flag of "Contain" implicitly sets the "Alloc"
- flag. Fix code path which could prevent execution of the Prolog when the
- "Alloc" or "Contain" flag were set.
- -- Fix for acct_gather_energy/cray|ibmaem to work with missed enum.
- -- MYSQL - When inserting a job and begin_time is 0 do not set it to
- submit_time. 0 means the job isn't eligible yet so we need to treat it so.
- -- MYSQL - Don't display ineligible jobs when querying for a window of time.
- -- Fix creation of advanced reservation of cores on nodes which are DOWN.
- -- Return permission denied if regular user tries to release job held by an
- administrator.
- -- MYSQL - Fix rollups for multiple jobs running by the same association
- in an hour counting multiple times.
- -- Burstbuffer/Cray plugin - Fix for persistent burst buffer use.
- Don't call paths if no #DW options.
- -- Modifications to pam_slurm_adopt to work correctly for the "extern" step.
- -- Alphabetize debugflags when printing them out.
- -- Fix systemd's slurmd service from killing slurmstepds on shutdown.
- -- Fixed counter of not indexed jobs, error_cnt post-increment changed to
- pre-increment.
- * Changes in Slurm 15.08.2
- ==========================
- -- Fix for tracking node state when jobs that have been allocated exclusive
- access to nodes (i.e. entire nodes) and later relinquish some nodes. Nodes
- would previously appear partly allocated and prevent use by other jobs.
- -- Correct some cgroup paths ("step_batch" vs. "step_4294967294", "step_exter"
- vs. "step_extern", and "step_extern" vs. "step_4294967295").
- -- Fix advanced reservation core selection logic with network topology.
- -- MYSQL - Remove restriction to have to be at least an operator to query TRES
- values.
- -- For pending jobs have sacct print 0 for nnodes instead of the bogus 2.
- -- Fix for tracking node state when jobs that have been allocated exclusive
- access to nodes (i.e. entire nodes) and later relinquish some nodes. Nodes
- would previously appear partly allocated and prevent use by other jobs.
- -- Fix updating job in db after extending job's timelimit past partition's
- timelimit.
- -- Fix srun -I<timeout> from flooding the controller with step create requests.
- -- Requeue/hold batch job launch request if job already running (possible if
- node went to DOWN state, but jobs remained active).
- -- If a job's CPUs/task ratio is increased due to configured MaxMemPerCPU,
- then increase it's allocated CPU count in order to enforce CPU limits.
- -- Don't mark powered down node as not responding. This could be triggered by
- race condition of the node suspend and ping logic, preventing use of the
- node.
- -- Don't requeue RPC going out from slurmctld to DOWN nodes (can generate
- repeating communication errors).
- -- Propagate sbatch "--dist=plane=#" option to srun.
- -- Add acct_gather_energy/ibmaem plugin for systems with IBM Systems Director
- Active Energy Manager.
- -- Fix spec file to look for mariadb or mysql devel packages for build
- requirements.
- -- MySQL - Improve the code with asking for jobs in a suspended state.
- -- Fix slurcmtld allowing root to see job steps using squeues -s.
- -- Do not send burst buffer stage out email unless the job uses burst buffers.
- -- Fix sacct to not return all jobs if the -j option is given with a trailing
- ','.
- -- Permit job_submit plugin to set a job's priority.
- -- Fix occasional srun segfault.
- -- Fix issue with sacct, printing 0_0 for array's that had finished in the
- database but the start record hadn't made it yet.
- -- sacctmgr - Don't allow default account associations to be removed
- from a user.
- -- Fix sacct -j, (nothing but a comma) to not return all jobs.
- -- Fixed slurmctld not sending cold-start messages correctly to the database
- when a cold-start (-c) happens to the slurmctld.
- -- Fix case where if the backup slurmdbd has existing connections when it gives
- up control that the it would be killed.
- -- Fix task/cgroup affinity to work correctly with multi-socket
- single-threaded cores. A regression caused only 1 socket to be used on
- this kind of node instead of all that were available.
- -- MYSQL - Fix minor issue after an index was added to the database it would
- previously take 2 restarts of the slurmdbd to make it stick correctly.
- -- Add hv_to_qos_cond() and qos_rec_to_hv() functions to the Perl interface.
- -- Add new burst_buffer.conf parameters: ValidateTimeout and OtherTimeout.
- See man page for details.
- -- Fix burst_buffer/cray support for interactive allocations >4GB.
- -- Correct backfill scheduling logic for job with INFINITE time limit.
- -- Fix issue on a scontrol reconfig all available GRES/TRES would be zeroed
- out.
- -- Set SLURM_HINT environment variable when --hint is used with sbatch or
- salloc.
- -- Add scancel -f/--full option to signal all steps including batch script and
- all of its child processes.
- -- Fix salloc -I to accept an argument.
- -- Avoid reporting more allocated CPUs than exist on a node. This can be
- triggered by resuming a previosly suspended job, resulting in
- oversubscription of CPUs.
- -- Fix the pty window manager in slurmstepd not to retry IO operation with
- srun if it read EOF from the connection with it.
- -- sbatch --ntasks option to take precedence over --ntasks-per-node plus node
- count, as documented. Set SLURM_NTASKS/SLURM_NPROCS environment variables
- accordingly.
- -- MYSQL - Make sure suspended time is only subtracted from the CPU TRES
- as it is the only TRES that can be given to another job while suspended.
- -- Clarify how TRESBillingWeights operates on memory and burst buffers.
- * Changes in Slurm 15.08.1
- ==========================
- -- Fix test21.30 and 21.34 to check grpwall better.
- -- Add time to the partition QOS the job is running on instead of just the
- job QOS.
- -- Print usage for GrpJobs, GrpSubmitJobs and GrpWall even if there is no
- limit.
- -- If AccountingEnforce=safe is set make sure a job can finish before going
- over the limit with grpwall on a QOS or association.
- -- burst_buffer/cray - Major updates based upon recent Cray changes.
- -- Improve clean up logic of pmi2 plugin.
- -- Improve job state reason string when required nodes not available.
- -- Fix missing else when packing an update partition message
- -- Fix srun from inheriting the SLURM_CPU_BIND and SLURM_MEM_BIND environment
- variables when running in an existing srun (e.g. an srun within an salloc).
- -- Fix missing else when packing an update partition message.
- -- Use more flexible mechnanism to find json installation.
- -- Make sure safe_limits was initialized before processing limits in the
- slurmctld.
- -- Fix for burst_buffer/cray to parse type option correctly.
- -- Fix memory error and version number in the nonstop plugin and reservation
- code.
- -- When requesting GRES in a step check for correct variable for the count.
- -- Fix issue with GRES in steps so that if you have multiple exclusive steps
- and you use all the GRES up instead of reporting the configuration isn't
- available you hold the requesting step until the GRES is available.
- -- MYSQL - Change debug to print out with DebugFlags=DB_Step instead of debug4
- -- Simplify code when user is selecting a job/step/array id and removed
- anomaly when only asking for 1 (task_id was never set to INFINITE).
- -- MYSQL - If user is requesting various task_ids only return requested steps.
- -- Fix issue when tres cnt for energy is 0 for total reported.
- -- Resolved scalability issues of power adaptive scheduling with layouts.
- -- Burst_buffer/cray bug - Fix teardown race condition that can result in
- infinite loop.
- -- Add support for --mail-type=NONE option.
- -- Job "--reboot" option automatically, set's exclusive node mode.
- -- Fix memory leak when using PrologFlags=Alloc.
- -- Fix truncation of job reason in squeue.
- -- If a node is in DOWN or DRAIN state, leave it unavailable for allocation
- when powered down.
- -- Update the slurm.conf man page documenting better nohold_on_prolog_fail
- variable.
- -- Don't trucate task ID information in "squeue --array/-r" or "sview".
- -- Fix a bug which caused scontrol to core dump when releasing or
- holding a job by name.
- -- Fix unit conversion bug in slurmd which caused wrong memory calculation
- for cgroups.
- -- Fix issue with GRES in steps so that if you have multiple exclusive steps
- and you use all the GRES up instead of reporting the configuration isn't
- available you hold the requesting step until the GRES is available.
- -- Fix slurmdbd backup to use DbdAddr when contacting the primary.
- -- Fix error in MPI documentation.
- -- Fix to handle arrays with respect to number of jobs submitted. Previously
- only 1 job was accounted (against MaxSubmitJob) for when an array was
- submitted.
- -- Correct counting for job array limits, job count limit underflow possible
- when master cancellation of master job record.
- -- Combine 2 _valid_uid_gid functions into a single function to avoid
- diversion.
- -- Pending job array records will be combined into single line by default,
- even if started and requeued or modified.
- -- Fix sacct --format=nnodes to print out correct information for pending
- jobs.
- -- Make is so 'scontrol update job 1234 qos='' will set the qos back to
- the default qos for the association.
- -- Add [Alloc|Req]Nodes to sacct to be more like cpus.
- -- Fix sacct documentation about [Alloc|Req]TRES
- -- Put node count in TRES string for steps.
- -- Fix issue with wrong protocol version when using the srun --no-allocate
- option.
- -- Fix TRES counts on GRES on a clean start of the slurmctld.
- -- Add ability to change a job array's maximum running task count:
- "scontrol update jobid=# arraytaskthrottle=#"
- * Changes in Slurm 15.08.0
- ==========================
- -- Fix issue with frontend systems (outside ALPs or BlueGene) where srun
- wouldn't get the correct protocol version to launch a step.
- -- Fix for message aggregation return rpcs where none of the messages are
- intended for the head of the tree.
- -- Fix segfault in sreport when there was no response from the dbd.
- -- ALPS - Fix compile to not link against -ljob and -lexpat with every lib
- or binary.
- -- Fix testing for CR_Memory when CR_Memory and CR_ONE_TASK_PER_CORE are used
- with select/linear.
- -- When restarting or reconfiging the slurmctld, if job is completing handle
- accounting correctly to avoid meaningless errors about overflow.
- -- Add AccountingStorageTRES to scontrol show config
- -- MySQL - Fix minor memory leak if a connection ever goes away whist using it.
- -- ALPS - Make it so srun --hint=nomultithread works correctly.
- -- Make MaxTRESPerUser work in sacctmgr.
- -- Fix handling of requeued jobs with steps that are still finishing.
- -- Cleaner copy for PriorityWeightTRES, it also fixes a core dump when trying
- to free it otherwise.
- -- Add environment variables SLURM_ARRAY_TASK_MAX, SLURM_ARRAY_TASK_MIN,
- SLURM_ARRAY_TASK_STEP for job arrays.
- -- Fix srun to use the NoInAddrAny TopologyParam option.
- -- Change QOS flag name from PartitionQOS to OverPartQOS to be a better
- description.
- -- Fix rpmbuild issue on Centos7.
- * Changes in Slurm 15.08.0rc1
- ==============================
- -- Added power_cpufreq layout.
- -- Make complete_batch_script RPC work with message aggregation.
- -- Do not count slurmctld threads waiting in a "throttle" lock against the
- daemon's thread limit as they are not contending for resources.
- -- Modify slurmctld outgoing RPC logic to support more parallel tasks (up to
- 85 RPCs and 256 pthreads; the old logic supported up to 21 RPCs and 256
- threads). This change can dramatically improve performance for RPCs
- operating on small node counts.
- -- Increase total backfill scheduler run time in stats_info_response_msg data
- structure from 32 to 64 bits in order to prevent overflow.
- -- Add NoInAddrAny option to TopologyParam in the slurm.conf which allows to
- bind to the interface of return of gethostname instead of any address on
- the node which avoid RSIP issues in Cray systems. This is most likely
- useful in other systems as well.
- -- Fix memory leak in Slurm::load_jobs perl api call.
- -- Added --noconvert option to sacct, sstat, squeue and sinfo which allows
- values to be displayed in their original unit types (e.g. 2048M won't be
- converted to 2G).
- -- Fix spelling of node_rescrs to node_resrcs in Perl API.
- -- Fix node state race condition, UNKNOWN->IDLE without configuration info.
- -- Cray: Disable LDAP references from slurmstepd on job launch due for
- improved scalability.
- -- Remove srun "read header error" due to application termination race
- condition.
- -- Optimize sacct queries with additional db indexes.
- -- Add SLURM_TOPO_LEN env variable for scontrol show topology.
- -- Add free_mem to node information.
- -- Fix abort of batch launch if prolog is running, wait for prolog instead.
- -- Fix case where job would get the wrong cpu count when using
- --ntasks-per-core and --cpus-per-task together.
- -- Add TRESBillingWeights to partitions in slurm.conf which allows taking into
- consideration any TRES Type when calculating the usage of a job.
- -- Add PriorityWeightTRES slurm.conf option to be able to configure priority
- factors for TRES types.
- * Changes in Slurm 15.08.0pre6
- ==============================
- -- Add scontrol options to view and modify layouts tables.
- -- Add MsgAggregationParams which controls a reverse tree to the slurmctld
- which can be used to aggregate messages to the slurmctld into a single
- message to reduce communication to the slurmctld. Currently only epilog
- complete messages and node registration messages use this logic.
- -- Add sacct and squeue options to print trackable resources.
- -- Add sacctmgr option to display trackable resources.
- -- If an salloc or srun command is executed on a "front-end" configuration,
- that job will be assigned a slurmd shepherd daemon on the same host as used
- to execute the command when possible rather than an slurmd daemon on an
- arbitrary front-end node.
- -- Add srun --accel-bind option to control how tasks are bound to GPUs and NIC
- Generic RESources (GRES).
- -- gres/nic plugin modified to set OMPI_MCA_btl_openib_if_include environment
- variable based upon allocated devices (usable with OpenMPI and Melanox).
- -- Make it so info options for srun/salloc/sbatch print with just 1 -v instead
- of 4.
- -- Add "no_backup_scheduling" SchedulerParameter to prevent jobs from being
- scheduled when the backup takes over. Jobs can be submitted, modified and
- cancelled while the backup is in control.
- -- Enable native Slurm backup controller to reside on an external Cray node
- when the "no_backup_scheduling" SchedulerParameter is used.
- -- Removed TICKET_BASED fairshare. Consider using the FAIR_TREE algorithm.
- -- Disable advanced reservation "REPLACE" option on IBM Bluegene systems.
- -- Add support for control distribution of tasks across cores (in addition
- to existing support for nodes and sockets, (e.g. "block", "cyclic" or
- "fcyclic" task distribution at 3 levels in the hardware rather than 2).
- -- Create db index on <cluster>_assoc_table.acct. Deleting accounts that didn't
- have jobs in the job table could take a long time.
- -- The performance of Profiling with HDF5 is improved. In addition, internal
- structures are changed to make it easier to add new profile types,
- particularly energy sensors. sh5util will continue to work with either
- format.
- -- Add partition information to sshare output if the --partition option
- is specified on the sshare command line.
- -- Add sreport -T/--tres option to identify Trackable RESources (TRES) to
- report.
- -- Display job in sacct when single step's cpus are different from the job
- allocation.
- -- Add association usage information to "scontrol show cache" command output.
- -- MPI/MVAPICH plugin now requires Munge for authentication.
- -- job_submit/lua: Add default_qos fields. Add job record qos. Add partition
- record allow_qos and qos_char fields.
- * Changes in Slurm 15.08.0pre5
- ==============================
- -- Add jobcomp/elasticsearch plugin. Libcurl is required for build. Configure
- the server as follows: "JobCompLoc=http://YOUR_ELASTICSEARCH_SERVER:9200".
- -- Scancel logic large re-written to better support job arrays.
- -- Added a slurm.conf parameter PrologEpilogTimeout to control how long
- prolog/epilog can run.
- -- Added TRES (Trackable resources) to track Mem, GRES, license, etc
- utilization.
- -- Add re-entrant versions of glibc time functions (e.g. localtime) to Slurm
- in order to eliminate rare deadlock of slurmstepd fork and exec calls.
- -- Constrain kernel memory (if available) in cgroups.
- -- Add PrologFlags option of "Contain" to create a proctrack container at
- job resource allocation time.
- -- Disable the OOM Killer in slurmd and slurmstepd's memory cgroup when using
- MemSpecLimit.
- * Changes in Slurm 15.08.0pre4
- ==============================
- -- Burst_buffer/cray - Convert logic to use new commands/API names (e.g.
- "dws_setup" rather than "bbs_setup").
- -- Remove the MinJobAge size limitation. It can now exceed 65533 as it
- is represented using an unsigned integer.
- -- Verify that all plugin version numbers are identical to the component
- attempting to load them. Without this verification, the plugin can reference
- Slurm functions in the caller which differ (e.g. the underlying function's
- arguments could have changed between Slurm versions).
- NOTE: All plugins (except SPANK) must be built against the identical
- version of Slurm in order to be used by any Slurm command or daemon. This
- should eliminate some very difficult to diagnose problems due to use of old
- plugins.
- -- Increase the MAX_PACK_MEM_LEN define to avoid PMI2 failure when fencing
- with large amount of ranks (to 1GB).
- -- Requests by normal user to reset a job priority (even to lower it) will
- result in an error saying to change the job's nice value instead.
- -- SPANK naming changes: For environment variables set using the
- spank_job_control_setenv() function, the values were available in the
- slurm_spank_job_prolog() and slurm_spank_job_epilog() functions using
- getenv where the name was given a prefix of "SPANK_". That prefix has
- been removed for consistency with the environment variables available in
- the Prolog and Epilog scripts.
- -- Major additions to the layouts framework code.
- -- Add "TopologyParam" configuration parameter. Optional value of "dragonfly"
- is supported.
- -- Optimize resource allocation for systems with dragonfly networks.
- -- Add "--thread-spec" option to salloc, sbatch and srun commands. This is
- the count of threads reserved for system use per node.
- -- job_submit/lua: Enable reading and writing job environment variables.
- For example: if (job_desc.environment.LANGUAGE == "en_US") then ...
- -- Added two new APIs slurm_job_cpus_allocated_str_on_node_id()
- and slurm_job_cpus_allocated_str_on_node() to print the CPUs id
- allocated to a job.
- -- Specialized memory (a node's MemSpecLimit configuration parameter) is not
- available for allocation to jobs.
- -- Modify scontrol update job to allow jobid specification without
- the = sign. 'scontrol update job=123 ...' and 'scontrol update job 123 ...'
- are both valid syntax.
- -- Archive a month at a time when there are lots of records to archive.
- -- Introduce new sbatch option '--kill-on-invalid-dep=yes|no' which allows
- users to specify which behavior they want if a job dependency is not
- satisfied.
- -- Add Slurmdb::qos_get() interface to perl api.
- -- If a job fails to start set the requeue reason to be:
- job requeued in held state.
- -- Implemented a new MPI key,value PMIX_RING() exchange algorithm as
- an alternative to PMI2.
- -- Remove possible deadlocks in the slurmctld when the slurmdbd is busy
- archiving/purging.
- -- Add DB_ARCHIVE debug flag for filtering out debug messages in the slurmdbd
- when the slurmdbd is archiving/purging.
- -- Fix some power_save mode issues: Parsing of SuspendTime in slurm.conf was
- bad, powered down nodes would get set non-responding if there was an
- in-flight message, and permit nodes to be powered down from any state.
- -- Initialize variables in consumable resource plugin to prevent core dump.
- * Changes in Slurm 15.08.0pre3
- ==============================
- -- CRAY - addition of acct_gather_energy/cray plugin.
- -- Add job credential to "Run Prolog" RPC used with a configuration of
- PrologFlags=alloc. This allows the Prolog to be passed identification of
- GPUs allocated to the job.
- -- Add SLURM_JOB_CONSTAINTS to environment variables available to the Prolog.
- -- Added "--mail=stage_out" option to job submission commands to notify user
- when burst buffer state out is complete.
- -- Require a "Reason" when using scontrol to set a node state to DOWN.
- -- Mail notifications on job BEGIN, END and FAIL now apply to a job array as a
- whole rather than generating individual email messages for each task in the
- job array.
- -- task/affinity - Fix memory binding to NUMA with cpusets.
- -- Display job's estimated NodeCount based off of partition's configured
- resources rather than the whole system's.
- -- Add AuthInfo option of "cred_expire=#" to specify the lifetime of a job
- step credential. The default value was changed from 1200 to 120 seconds.
- -- Set the delay time for job requeue to the job credential lifetime (120
- seconds by default). This insures that prolog runs on every node when a
- job is requeued. (This change will slow down launch of re-queued jobs).
- -- Add AuthInfo option of "cred_expire=#" to specify the lifetime of a job
- step credential.
- -- Remove srun --max-launch-time option. The option has not been functional
- since Slurm version 2.0.
- -- Add sockets and cores to TaskPluginParams' autobind option.
- -- Added LaunchParameters configuration parameter. Have srun command test
- locally for the executable file if LaunchParameters=test_exec or the
- environment variable SLURM_TEST_EXEC is set. Without this an invalid
- command will generate one error message per task launched.
- -- Fix the slurm /etc/init.d script to return 0 upon stopping the
- daemons and return 1 in case of failure.
- -- Add the ability for a compute node to be allocated to multiple jobs, but
- restricted to a single user. Added "--exclusive=user" option to salloc,
- sbatch and srun commands. Added "owner" field to node record, visible using
- the scontrol and sview commands. Added new partition configuration parameter
- "ExclusiveUser=yes|no".
- * Changes in Slurm 15.08.0pre2
- ==============================
- -- Add the environment variables SLURM_JOB_ACCOUNT, SLURM_JOB_QOS
- and SLURM_JOB_RESERVATION in the batch/srun jobs.
- -- Add sview burst buffer display.
- -- Properly enforce partition Shared=YES option. Previously oversubscribing
- resources required gang scheduling to be configured.
- -- Enable per-partition gang scheduling resource resolution (e.g. the partition
- can have SelectTypeParameters=CR_CORE, while the global value is CR_SOCKET).
- -- Make it so a newer version of a slurmstepd can talk to an older srun.
- allocation. Nodes could have been added while waiting for an allocation.
- -- Expanded --cpu-freq parameters to include min-max:governor specifications.
- --cpu-freq now supported on salloc and sbatch.
- -- Add support for optimized job allocations with respect to SGI Hypercube
- topology.
- NOTE: Only supported with select/linear plugin.
- NOTE: The program contribs/sgi/netloc_to_topology can be used to build
- Slurm's topology.conf file.
- -- Remove 64k validation of incoming RPC nodelist size. Validated at 64MB
- when unpacking.
- -- In slurmstepd() add the user primary group if it is not part of the
- groups sent from the client.
- -- Added BurstBuffer field to advanced reservations.
- -- For advanced reservation, replace flag "License_only" with flag "Any_Nodes".
- It can be used to indicate the an advanced reservation resources (licenses
- and/or burst buffers) can be used with any compute nodes.
- -- Allow users to specify the srun --resv-ports as 0 in which case no ports
- will be reserved. The default behaviour is to allocate one port per task.
- -- Interpret a partition configuration of "Nodes=ALL" in slurm.conf as
- including all nodes defined in the cluster.
- -- Added new configuration parameters PowerParameters and PowerPlugin.
- -- Added power management plugin infrastructure.
- -- If job already exceeded one of its QOS/Accounting limits do not
- return error if user modifies QOS unrelated job settings.
- -- Added DebugFlags value of "Power".
- -- When caching user ids of AllowGroups use both getgrnam_r() and getgrent_r()
- then remove eventual duplicate entries.
- -- Remove rpm dependency between slurm-pam and slurm-devel.
- -- Remove support for the XCPU (cluster management) package.
- -- Add Slurmdb::jobs_get() interface to perl api.
- -- Performance improvement when sending data from srun to stepds when
- processing fencing.
- -- Add the feature to specify arbitrary field separator when running
- sacct -p or sacct -P. The command line option is --separator.
- -- Introduce slurm.conf parameter to use Proportional Set Size (PSS) instead
- of RSS to determinate the memory footprint of a job.
- Add an slurm.conf option not to kill jobs that is over memory limit.
- -- Add job submission command options: --sicp (available for inter-cluster
- dependencies) and --power (specify power management options) to salloc,
- sbatch, and srun commands.
- -- Add DebugFlags option of SICP (inter-cluster option logging).
- -- In order to support inter-cluster job dependencies, the MaxJobID
- configuration parameter default value has been reduced from 4,294,901,760
- to 2,147,418,112 and it's maximum value is now 2,147,463,647.
- ANY JOBS WITH A JOB ID ABOVE 2,147,463,647 WILL BE PURGED WHEN SLURM IS
- UPGRADED FROM AN OLDER VERSION!
- -- Add QOS name to the output of a partition in squeue/scontrol/sview/smap.
- * Changes in Slurm 15.08.0pre1
- ==============================
- -- Add sbcast support for file transfer to resources allocated to a job step
- rather than a job allocation.
- -- Change structures with association in them to assoc to save space.
- -- Add support for job dependencies jointed with OR operator (e.g.
- "--depend=afterok:123?afternotok:124").
- -- Add "--bb" (burst buffer specification) option to salloc, sbatch, and srun.
- -- Added configuration parameters BurstBufferParameters and BurstBufferType.
- -- Added burst_buffer plugin infrastructure (needs many more functions).
- -- Make it so when the fanout logic comes across a node that is down we abandon
- the tree to avoid worst case scenarios when the entire branch is down and
- we have to try each serially.
- -- Add better error reporting of invalid partitions at submission time.
- -- Move will-run test for multiple clusters from the sbatch code into the API
- so that it can be used with DRMAA.
- -- If a non-exclusive allocation requests --hint=nomultithread on a
- CR_CORE/SOCKET system lay out tasks correctly.
- -- Avoid including unused CPUs in a job's allocation when cores or sockets are
- allocated.
- -- Added new job state of STOPPED indicating processes have been stopped with a
- SIGSTOP (using scancel or sview), but retain its allocated CPUs. Job state
- returns to RUNNING when SIGCONT is sent (also using scancel or sview).
- -- Added EioTimeout parameter to slurm.conf. It is the number of seconds srun
- waits for slurmstepd to close the TCP/IP connection used to relay data
- between the user application and srun when the user application terminates.
- -- Remove slurmctld/dynalloc plugin as the work was never completed, so it is
- not worth the effort of continued support at this time.
- -- Remove DynAllocPort configuration parameter.
- -- Add advance reservation flag of "replace" that causes allocated resources
- to be replaced with idle resources. This maintains a pool of available
- resources that maintains a constant size (to the extent possible).
- -- Added SchedulerParameters option of "bf_busy_nodes". When selecting
- resources for pending jobs to reserve for future execution (i.e. the job
- can not be started immediately), then preferentially select nodes that are
- in use. This will tend to leave currently idle resources available for
- backfilling longer running jobs, but may result in allocations having less
- than optimal network topology. This option is currently only supported by
- the select/cons_res plugin.
- -- Permit "SuspendTime=NONE" as slurm.conf value rather than only a numeric
- value to match "scontrol show config" output.
- -- Add the 'scontrol show cache' command which displays the associations
- in slurmctld.
- -- Test more frequently for node boot completion before starting a job.
- Provides better responsiveness.
- -- Fix PMI2 singleton initialization.
- -- Permit PreemptType=qos and PreemptMode=suspend,gang to be used together.
- A high-priority QOS job will now oversubscribe resources and gang schedule,
- but only if there are insufficient resources for the job to be started
- without preemption. NOTE: That with PreemptType=qos, the partition's
- Shared=FORCE:# configuration option will permit one job more per resource
- to be run than than specified, but only if started by preemption.
- -- Remove the CR_ALLOCATE_FULL_SOCKET configuration option. It is now the
- default.
- -- Fix a race condition in PMI2 when fencing counters can be out of sync.
- -- Increase the MAX_PACK_MEM_LEN define to avoid PMI2 failure when fencing
- with large amount of ranks.
- -- Add QOS option to a partition. This will allow a partition to have
- all the limits a QOS has. If a limit is set in both QOS the partition
- QOS will override the job's QOS unless the job's QOS has the
- OverPartQOS flag set.
- -- The task_dist_states variable has been split into "flags" and "base"
- components. Added SLURM_DIST_PACK_NODES and SLURM_DIST_NO_PACK_NODES values
- to give user greater control over task distribution. The srun --dist options
- has been modified to accept a "Pack" and "NoPack" option. These options can
- be used to override the CR_PACK_NODE configuration option.
- * Changes in Slurm 14.11.12
- ===========================
- -- Correct dependency formatting to print array task ids if set.
- -- Fix for configuration of "AuthType=munge" and "AuthInfo=socket=..." with
- alternate munge socket path.
- -- BGQ - Remove redeclaration of job_read_lock.
- -- BGQ - Tighter locks around structures when nodes/cables change state.
- -- Fix job array formatting to allow return [0-100:2] display for arrays with
- step functions rather than [0,2,4,6,8,...] .
- -- Associations - prevent hash table corruption if uid initially unset for
- a user, which can cause slurmctld to crash if that user is deleted.
- -- Add cast to memory limit calculation to prevent integer overflow for
- very large memory values.
- -- Fix test cases to have proper int return signature.
- * Changes in Slurm 14.11.11
- ===========================
- -- Fix systemd's slurmd service from killing slurmstepds on shutdown.
- -- Fix the qstat wrapper when user is removed from the system but still
- has running jobs.
- -- Log the request to terminate a job at info level if DebugFlags includes
- the Steps keyword.
- -- Fix potential memory corruption in _slurm_rpc_epilog_complete as well as
- _slurm_rpc_complete_job_allocation.
- -- Fix incorrectly sized buffer used by jobid2str which will cause buffer
- overflow in slurmctld. (Bug 2295.)
- * Changes in Slurm 14.11.10
- ===========================
- -- Fix truncation of job reason in squeue.
- -- If a node is in DOWN or DRAIN state, leave it unavailable for allocation
- when powered down.
- -- Update the slurm.conf man page documenting better nohold_on_prolog_fail
- variable.
- -- Don't trucate task ID information in "squeue --array/-r" or "sview".
- -- Fix a bug which caused scontrol to core dump when releasing or
- holding a job by name.
- -- Fix unit conversion bug in slurmd which caused wrong memory calculation
- for cgroups.
- -- Fix issue with GRES in steps so that if you have multiple exclusive steps
- and you use all the GRES up instead of reporting the configuration isn't
- available you hold the requesting step until the GRES is available.
- -- Fix slurmdbd backup to use DbdAddr when contacting the primary.
- -- Fix error in MPI documentation.
- -- Fix to handle arrays with respect to number of jobs submitted. Previously
- only 1 job was accounted (against MaxSubmitJob) for when an array was
- submitted.
- -- Correct counting for job array limits, job count limit underflow possible
- when master cancellation of master job record.
- -- For pending jobs have sacct print 0 for nnodes instead of the bogus 2.
- -- Fix for tracking node state when jobs that have been allocated exclusive
- access to nodes (i.e. entire nodes) and later relinquish some nodes. Nodes
- would previously appear partly allocated and prevent use by other jobs.
- -- Fix updating job in db after extending job's timelimit past partition's
- timelimit.
- -- Fix srun -I<timeout> from flooding the controller with step create requests.
- -- Requeue/hold batch job launch request if job already running (possible if
- node went to DOWN state, but jobs remained active).
- -- If a job's CPUs/task ratio is increased due to configured MaxMemPerCPU,
- then increase it's allocated CPU count in order to enforce CPU limits.
- -- Don't mark powered down node as not responding. This could be triggered by
- race condition of the node suspend and ping logic.
- -- Don't requeue RPC going out from slurmctld to DOWN nodes (can generate
- repeating communication errors).
- -- Propagate sbatch "--dist=plane=#" option to srun.
- -- Fix sacct to not return all jobs if the -j option is given with a trailing
- ','.
- -- Permit job_submit plugin to set a job's priority.
- -- Fix occasional srun segfault.
- -- Fix issue with sacct, printing 0_0 for array's that had finished in the
- database but the start record hadn't made it yet.
- -- Fix sacct -j, (nothing but a comma) to not return all jobs.
- -- Prevent slurmstepd from core dumping if /proc/<pid>/stat has
- unexpected format.
- * Changes in Slurm 14.11.9
- ==========================
- -- Correct "sdiag" backfill cycle time calculation if it yields locks. A
- microsecond value was being treated as a second value resulting in an
- overflow in the calcuation.
- -- Fix segfault when updating timelimit on jobarray task.
- -- Fix to job array update logic that can result in a task ID of 4294967294.
- -- Fix of job array update, previous logic could fail to update some tasks
- of a job array for some fields.
- -- CRAY - Fix seg fault if a blade is replaced and slurmctld is restarted.
- -- Fix plane distribution to allocate in blocks rather than cyclically.
- -- squeue - Remove newline from job array ID value printed.
- -- squeue - Enable filtering for job state SPECIAL_EXIT.
- -- Prevent job array task ID being inappropriately set to NO_VAL.
- -- MYSQL - Make it so you don't have to restart the slurmctld
- to gain the correct limit when a parent account is root and you
- remove a subaccount's limit which exists on the parent account.
- -- MYSQL - Close chance of setting the wrong limit on an association
- when removing a limit from an association on multiple clusters
- at the same time.
- -- MYSQL - Fix minor memory leak when modifying an association but no
- change was made.
- -- srun command line of either --mem or --mem-per-cpu will override both the
- SLURM_MEM_PER_CPU and SLURM_MEM_PER_NODE environment variables.
- -- Prevent slurmctld abort on update of advanced reservation that contains no
- nodes.
- -- ALPS - Revert commit 2c95e2d22 which also removes commit 2e2de6a4 allowing
- cray with the SubAllocate option to work as it did with 2.5.
- -- Properly parse CPU frequency data on POWER systems.
- -- Correct sacct.a man pages describing -i option.
- -- Capture salloc/srun information in sdiag statistics.
- -- Fix bug in node selection with topology optimization.
- -- Don't set distribution when srun requests 0 memory.
- -- Read in correct number of nodes from SLURM_HOSTFILE when specifying nodes
- and --distribution=arbitrary.
- -- Fix segfault in Bluegene setups where RebootQOSList is defined in
- bluegene.conf and accounting is not setup.
- -- MYSQL - Update mod_time when updating a start job record or adding one.
- -- MYSQL - Fix issue where if an association id ever changes on at least a
- portion of a job array is pending after it's initial start in the
- database it could create another row for the remain array instead
- of using the already existing row.
- -- Fix scheduling anomaly with job arrays submitted to multiple partitions,
- jobs could be started out of priority order.
- -- If a host has suspened jobs do not reboot it. Reboot only hosts
- with no jobs in any state.
- -- ALPS - Fix issue when using --exclusive flag on srun to do the correct
- thing (-F exclusive) instead of -F share.
- -- Fix various memory leaks in the Perl API.
- -- Fix a bug in the controller which display jobs in CF state as RUNNING.
- -- Preserve advanced _core_ reservation when nodes added/removed/resized on
- slurmctld restart. Rebuild core_bitmap as needed.
- -- Fix for non-standard Munge port location for srun/pmi use.
- -- Fix gang scheduling/preemption issue that could cancel job at startup.
- -- Fix a bug in squeue which prevented squeue -tPD to print array jobs.
- -- Sort job arrays in job queue according to array_task_id when priorities are
- equal.
- -- Fix segfault in sreport when there was no response from the dbd.
- -- ALPS - Fix compile to not link against -ljob and -lexpat with every lib
- or binary.
- -- Fix testing for CR_Memory when CR_Memory and CR_ONE_TASK_PER_CORE are used
- with select/linear.
- -- MySQL - Fix minor memory leak if a connection ever goes away whist using it.
- -- ALPS - Make it so srun --hint=nomultithread works correctly.
- -- Prevent job array task ID from being reported as NO_VAL if last task in the
- array gets requeued.
- -- Fix some potential deadlock issues when state files don't exist in the
- association manager.
- -- Correct RebootProgram logic when executed outside of a maintenance
- reservation.
- -- Requeue job if possible when slurmstepd aborts.
- * Changes in Slurm 14.11.8
- ==========================
- -- Eliminate need for user to set user_id on job_update calls.
- -- Correct list of unavailable nodes reported in a job's "reason" field when
- that job can not start.
- -- Map job --mem-per-cpu=0 to --mem=0.
- -- Fix squeue -o %m and %d unit conversion to Megabytes.
- -- Fix issue with incorrect time calculation in the priority plugin when
- a job runs past it's time limit.
- -- Prevent users from setting job's partition to an invalid partition.
- -- Fix sreport core dump when requesting
- 'job SizesByAccount grouping=individual'.
- -- select/linear: Correct count of CPUs allocated to job on system with
- hyperthreads.
- -- Fix race condition where last array task might not get updated in the db.
- -- CRAY - Remove libpmi from rpm install
- -- Fix squeue -o %X output to correctly handle NO_VAL and suffix.
- -- When deleting a job from the system set the job_id to 0 to avoid memory
- corruption if thread uses the pointer basing validity off the id.
- -- Fix issue where sbatch would set ntasks-per-node to 0 making any srun
- afterward cause a divide by zero error.
- -- switch/cray: Refine logic to set PMI_CRAY_NO_SMP_ENV environment variable.
- -- When sacctmgr loads archives with version less than 14.11 set the array
- task id to NO_VAL, so sacct can display the job ids correctly.
- -- When using memory cgroup if a task uses more memory than requested
- the failures are logged into memory.failcnt count file by cgroup
- and the user is notified by slurmstepd about it.
- -- Fix scheduling inconsistency with GRES bound to specific CPUs.
- -- If user belongs to a group which has split entries in /etc/group
- search for its username in all groups.
- -- Do not consider nodes explicitly powered up as DOWN with reason of "Node
- unexpected rebooted".
- -- Use correct slurmd spooldir when creating cpu-frequency locks.
- -- Note that TICKET_BASED fairshare will be deprecated in the future. Consider
- using the FAIR_TREE algorithm instead.
- -- Set job's reason to BadConstaints when job can't run on any node.
- -- Prevent abort on update of reservation with no nodes (licenses only).
- -- Prevent slurmctld from dumping core if job_resrcs is missing in the
- job data structure.
- -- Fix squeue to print array task ids according to man page when
- SLURM_BITSTR_LEN is defined in the environment.
- -- In squeue, sort jobs based on array job ID if available.
- -- Fix the calculation of job energy by not including the NO_VAL values.
- -- Advanced reservation fixes: enable update of bluegene reservation, avoid
- abort on multi-core reservations.
- -- Set the totalview_stepid to the value of the job step instead of NO_VAL.
- -- Fix slurmdbd core dump if the daemon does not have connection with
- the database.
- -- Display error message when attempting to modify priority of a held job.
- -- Backfill scheduler: The configured backfill_interval value (default 30
- seconds) is now interpretted as a maximum run time for the backfill
- scheduler. Once reached, the scheduler will build a new job queue and
- start over, even if not all jobs have been tested.
- -- Backfill scheduler now considers OverTimeLimit and KillWait configuration
- parameters to estimate when running jobs will exit.
- -- Correct task layout with CR_Pack_Node option and more than 1 CPU per task.
- -- Fix the scontrol man page describing the release argument.
- -- When job QOS is modified, do so before attempting to change partition in
- order to validate the partition's Allow/DenyQOS parameter.
- * Changes in Slurm 14.11.7
- ==========================
- -- Initialize some variables used with the srun --no-alloc option that may
- cause random failures.
- -- Add SchedulerParameters option of sched_min_interval that controls the
- minimum time interval between any job scheduling action. The default value
- is zero (disabled).
- -- Change default SchedulerParameters=max_sched_time from 4 seconds to 2.
- -- Refactor scancel so that all pending jobs are cancelled before starting
- cancellation of running jobs. Otherwise they happen in parallel and the
- pending jobs can be scheduled on resources as the running jobs are being
- cancelled.
- -- ALPS - Add new cray.conf variable NoAPIDSignalOnKill. When set to yes this
- will make it so the slurmctld will not signal the apid's in a batch job.
- Instead it relies on the rpc coming from the slurmctld to kill the job to
- end things correctly.
- -- ALPS - Have the slurmstepd running a batch job wait for an ALPS release
- before ending the job.
- -- Initialize variables in consumable resource plugin to prevent core dump.
- -- Fix scancel bug which could return an error on attempt to signal a job step.
- -- In slurmctld communication agent, make the thread timeout be the configured
- value of MessageTimeout rather than 30 seconds.
- -- sshare -U/--Users only flag was used uninitialized.
- -- Cray systems, add "plugstack.conf.template" sample SPANK configuration file.
- -- BLUEGENE - Set DB2NOEXITLIST when starting the slurmctld daemon to avoid
- random crashing in db2 when the slurmctld is exiting.
- -- Make full node reservations display correctly the core count instead of
- cpu count.
- -- Preserve original errno on execve() failure in task plugin.
- -- Add SLURM_JOB_NAME env variable to an salloc's environment.
- -- Overwrite SLURM_JOB_NAME in an srun when it gets an allocation.
- -- Make sure each job has a wckey if that is something that is tracked.
- -- Make sure old step data is cleared when job is requeued.
- -- Load libtinfo as needed when building ncurses tools.
- -- Fix small memory leak in backup controller.
- -- Fix segfault when backup controller takes control for second time.
- -- Cray - Fix backup controller running native Slurm.
- -- Provide prototypes for init_setproctitle()/fini_setproctitle on NetBSD.
- -- Add configuration test to find out the full path to su command.
- -- preempt/job_prio plugin: Fix for possible infinite loop when identifying
- preemptable jobs.
- -- preempt/job_prio plugin: Implement the concept of Warm-up Time here. Use
- the QoS GraceTime as the amount of time to wait before preempting.
- Basically, skip preemption if your time is not up.
- -- Make srun wait KillWait time when a task is cancelled.
- -- switch/cray: Revert logic added to 14.11.6 that set "PMI_CRAY_NO_SMP_ENV=1"
- if CR_PACK_NODES is configured.
- * Changes in Slurm 14.11.6
- ==========================
- -- If SchedulerParameters value of bf_min_age_reserve is configured, then
- a newly submitted job can start immediately even if there is a higher
- priority non-runnable job which has been waiting for less time than
- bf_min_age_reserve.
- -- qsub wrapper modified to export "all" with -V option
- -- RequeueExit and RequeueExitHold configuration parameters modified to accept
- numeric ranges. For example "RequeueExit=1,2,3,4" and "RequeueExit=1-4" are
- equivalent.
- -- Correct the job array specification parser to accept brackets in job array
- expression (e.g. "123_[4,7-9]").
- -- Fix for misleading job submit failure errors sent to users. Previous error
- could indicate why specific nodes could not be used (e.g. too small memory)
- when other nodes could be used, but were not for another reason.
- -- Fix squeue --array to display correctly the array elements when the
- % separator is specified at the array submission time.
- -- Fix priority from not being calculated correctly due to memory issues.
- -- Fix a transient pending reason 'JobId=job_id has invalid QOS'.
- -- A non-administrator change to job priority will not be persistent except
- for holding the job. User's wanting to change a job priority on a persistent
- basis should reset it's "nice" value.
- -- Print buffer sizes as unsigned values when failed to pack messages.
- -- Fix race condition where sprio would print factors without weights applied.
- -- Document the sacct option JobIDRaw which for arrays prints the jobid instead
- of the arrayTaskId.
- -- Allow users to modify MinCPUsNode, MinMemoryNode and MinTmpDiskNode of
- their own jobs.
- -- Increase the jobid print field in SQUEUE_FORMAT in
- opt_modulefiles_slurm.in.
- -- Enable compiling without optimizations and with debugging symbols by
- default. Disable this by configuring with --disable-debug.
- -- job_submit/lua plugin: Add mail_type and mail_user fields.
- -- Correct output message from sshare.
- -- Use standard statvfs(2) syscall if available, in preference to
- non-standard statfs.
- -- Add a new option -U/--Users to sshare to display only users
- information, parent and ancestors are not printed.
- -- Purge 50000 records at a time so that locks can released periodically.
- -- Fix potentially uninitialized variables
- -- ALPS - Fix issue where a frontend node could become unresponsive and never
- added back into the system.
- -- Gate epilog complete messages as done with other messages
- -- If we have more than a certain number of agents (50) wait longer when gating
- rpcs.
- -- FrontEnd - ping non-responding or down nodes.
- -- switch/cray: If CR_PACK_NODES is configured, then set the environment
- variable "PMI_CRAY_NO_SMP_ENV=1"
- -- Fix invalid memory reference in SlurmDBD when putting a node up.
- -- Allow opening of plugstack.conf even when a symlink.
- -- Fix scontrol reboot so that rebooted nodes will not be set down with reason
- 'Node xyz unexpectedly rebooted' but will be correctly put back to service.
- -- CRAY - Throttle the post NHC operations as to not hog the job write lock
- if many steps/jobs finish at once.
- -- Disable changes to GRES count while jobs are running on the node.
- -- CRAY - Fix issue with scontrol reconfig.
- -- slurmd: Remove wrong reporting of "Error reading step ... memory limit".
- The logic was treating success as an error.
- -- Eliminate "Node ping apparently hung" error messages.
- -- Fix average CPU frequency calculation.
- -- When allocating resources with resolution of sockets, charge the job for all
- CPUs on allocated sockets rather than just the CPUs on used cores.
- -- Prevent slurmdbd error if cluster added or removed while rollup in progress.
- Removing a cluster can cause slurmdbd to abort. Adding a cluster can cause
- the slurmdbd rollup to hang.
- -- sview - When right clicking on a tab make sure we don't display the page
- list, but only the column list.
- -- FRONTEND - If doing a clean start make sure the nodes are brought up in the
- database.
- -- MySQL - Fix issue when using the TrackSlurmctldDown and nodes are down at
- the same time, don't double bill the down time.
- -- MySQL - Various memory leak fixes.
- -- sreport - Fix Energy displays
- -- Fix node manager logic to keep unexpectedly rebooted node in state
- NODE_STATE_DOWN even if already down when rebooted.
- -- Fix for array jobs submitted to multiple partitions not starting.
- -- CRAY - Enable ALPs mpp compatibility code in sbatch for native Slurm.
- -- ALPS - Move basil_inventory to less confusing function.
- -- Add SchedulerParameters option of "sched_max_job_start=" to limit the
- number of jobs that can be started in any single execution of the main
- scheduling logic.
- -- Fixed compiler warnings generated by gcc version >= 4.6.
- -- sbatch to stop parsing script for "#SBATCH" directives after first command,
- which matches the documentation.
- -- Overwrite the SLURM_JOB_NAME in sbatch if already exist in the environment
- and use the one specified on the command line --job-name.
- -- Remove xmalloc_nz from unpack functions. If the unpack ever failed the
- free afterwards would not have zeroed out memory on the variables that
- didn't get unpacked.
- -- Improve database interaction from controller.
- -- Fix for data shift when loading job archives.
- -- ALPS - Added new SchedulerParameters=inventory_interval to specify how
- often an inventory request is handled.
- -- ALPS - Don't run a release on a reservation on the slurmctld for a batch
- job. This is already handled on the stepd when the script finishes.
- * Changes in Slurm 14.11.5
- ==========================
- -- Correct the squeue command taking into account that a node can
- have NULL name if it is not in DNS but still in slurm.conf.
- -- Fix slurmdbd regression which would cause a segfault when a node is set
- down with no reason.
- -- BGQ - Fix issue with job arrays not being handled correctly
- in the runjob_mux plugin.
- -- Print FAIR_TREE, if configured, in "scontrol show config" output for
- PriorityFlags.
- -- Add SLURM_JOB_GPUS environment variable to those available in the Prolog.
- -- Load lua-5.2 library if using lua5.2 for lua job submit plugin.
- -- GRES logic: Prevent bad node_offset due to not preserving no_consume flag.
- -- Fix wrong variables used in the wrapper functions needed for systems that
- don't support strong_alias
- -- Fix code for apple computers SOL_TCP is not defined
- -- Cray/BASIL - Check for mysql credentials in /root/.my.cnf.
- -- Fix sprio showing wrong priority for job arrays until priority is
- recalculated.
- -- Account to batch step all CPUs that are allocated to a job not
- just one since the batch step has access to all CPUs like other steps.
- -- Fix job getting EligibleTime set before meeting dependency requirements.
- -- Correct the initialization of QOS MinCPUs per job limit.
- -- Set the debug level of information messages in cgroup plugin to debug2.
- -- For job running under a debugger, if the exec of the task fails, then
- cancel its I/O and abort immediately rather than waiting 60 seconds for
- I/O timeout.
- -- Fix associations not getting default qos set until after a restart.
- -- Set the value of total_cpus not to be zero before invoking
- acct_policy_job_runnable_post_select.
- -- MySQL - When requesting cluster resources, only return resources for the
- cluster(s) requested.
- -- Add TaskPluginParam=autobind=threads option to set a default binding in the
- case that "auto binding" doesn't find a match.
- -- Introduce a new SchedulerParameters variable nohold_on_prolog_fail.
- If configured don't requeue jobs on hold is a Prolog fails.
- -- Make it so sched_params isn't read over and over when an epilog complete
- message comes in
- -- Fix squeue -L <licenses> not filtering out jobs with licenses.
- -- Changed the implementation of xcpuinfo_abs_to_mac() be identical
- _abs_to_mac() to fix CPUs allocation using cpuset cgroup.
- -- Improve the explanation of the unbuffered feature in the
- srun man page.
- -- Make taskplugin=cgroup work for core spec. needed to have task/cgroup
- before.
- -- Fix reports not using the month usage table.
- -- BGQ - Sanity check given for translating small blocks into slurm bg_records.
- -- Fix bug preventing the requeue/hold or requeue/special_exit of job from the
- completing state.
- -- Cray - Fix for launching batch step within an existing job allocation.
- -- Cray - Add ALPS_APP_ID_ENV environment variable.
- -- Increase maximum MaxArraySize configuration parameter value from 1,000,001
- to 4,000,001.
- -- Added new SchedulerParameters value of bf_min_age_reserve. The backfill
- scheduler will not reserve resources for pending jobs until they have
- been pending for at least the specified number of seconds. This can be
- valuable if jobs lack time limits or all time limits have the same value.
- -- Fix support for --mem=0 (all memory of a node) with select/cons_res plugin.
- -- Fix bug that can permit someone to kill job array belonging to another user.
- -- Don't set the default partition on a license only reservation.
- -- Show a NodeCnt=0, instead of NO_VAL, in "scontrol show res" for a license
- only reservation.
- -- BGQ - When using static small blocks make sure when clearing the job the
- block is set up to it's original state.
- -- Start job allocation using lowest numbered sockets for block task
- distribution for consistency with cyclic distribution.
- * Changes in Slurm 14.11.4
- ==========================
- -- Make sure assoc_mgr locks are initialized correctly.
- -- Correct check of enforcement when filling in an association.
- -- Make sacctmgr print out classification correctly for clusters.
- -- Add array_task_str to the perlapi job info.
- -- Fix for slurmctld abort with GRES types configured and no CPU binding.
- -- Fix for GRES scheduling where count > 1 per topology type (or GRES types).
- -- Make CR_ONE_TASK_PER_CORE work correctly with task/affinity.
- -- job_submit/pbs - Fix possible deadlock.
- -- job_submit/lua - Add "alloc_node" to job information available.
- -- Fix memory leak in mysql accounting when usage rollup happens.
- -- If users specify ALL together with other variables using the
- --export sbatch/srun command line option, propagate the users'
- environ to the execution side.
- -- Fix job array scheduling anomaly that can stop scheduling of valid tasks.
- -- Fix perl api tests for libslurmdb to work correctly.
- -- Remove some misleading logs related to non-consumable GRES.
- -- Allow --ignore-pbs to take effect when read as an #SBATCH argument.
- -- Fix Slurmdb::clusters_get() in perl api from not returning information.
- -- Fix TaskPluginParam=Cpusets from logging error message about not being able
- to remove cpuset dir which was already removed by the release_agent.
- -- Fix sorting by time left in squeue.
- -- Fix the file name substitution for job stderr when %A, %a %j and %u
- are specified.
- -- Remove minor warning when compiling slurmstepd.
- -- Fix database resources so they can add new clusters to them after they have
- initially been added.
- -- Use the slurm_getpwuid_r wrapper of getpwuid_r to handle possible
- interrupts.
- -- Correct the scontrol man page and command listing which node states can
- be set by the command.
- -- Stop sacct from printing non-existent stat information for
- Front End systems.
- -- Correct srun and acct_gather.conf man pages, mention Filesystem instead
- of Lustre.
- -- When a job using multiple partition starts send to slurmdbd only
- the partition in which the job runs.
- -- ALPS - Fix depth for MemoryAllocation in BASIL with CLE 5.2.3.
- -- Fix assoc_mgr hash to deal with users that don't have a uid yet when making
- reservations.
- -- When a job uses multiple partition set the environment variable
- SLURM_JOB_PARTITION to be the one in which the job started.
- -- Print spurious message about the absence of cgroup.conf at log level debug2
- instead of info.
- -- Enable CUDA v7.0+ use with a Slurm configuration of TaskPlugin=task/cgroup
- ConstrainDevices=yes (in cgroup.conf). With that configuration
- CUDA_VISIBLE_DEVICES will start at 0 rather than the device number.
- -- Fix job array logic that can cause slurmctld to abort.
- -- Report job "shared" field properly in scontrol, squeue, and sview.
- -- If a job is requeued because of RequeueExit or RequeueExitHold sent event
- REQUEUED to slurmdbd.
- -- Fix build if hwloc is in non-standard location.
- -- Fix slurmctld job recovery logic which could cause the last task in a job
- array to be lost.
- -- Fix slurmctld initialization problem which could cause requeue of the last
- task in a job array to fail if executed prior to the slurmctld loading
- the maximum size of a job array into a variable in the job_mgr.c module.
- -- Fix fatal in controller when deleting a user association of a user which
- had been previously removed from the system.
- -- MySQL - If a node state and reason are the same on a node state change
- don't insert a new row in the event table.
- -- Fix issue with "sreport cluster AccountUtilizationByUser" when using
- PrivateData=users.
- -- Fix perlapi tests for libslurm perl module.
- -- MySQL - Fix potential issue when PrivateData=Usage and a normal user
- runs certain sreport reports.
- * Changes in Slurm 14.11.3
- ==========================
- -- Prevent vestigial job record when canceling a pending job array record.
- -- Fixed squeue core dump.
- -- Fix job array hash table bug, could result in slurmctld infinite loop or
- invalid memory reference.
- -- In srun honor ntasks_per_node before looking at cpu count when the user
- doesn't request a number of tasks.
- -- Fix ghost job when submitting job after all jobids are exhausted.
- -- MySQL - Enhanced coordinator security checks.
- -- Fix for task/affinity if an admin configures a node for having threads
- but then sets CPUs to only represent the number of cores on the node.
- -- Make it so previous versions of salloc/srun work with newer versions
- of Slurm daemons.
- -- Avoid delay on commit for PMI rank 0 to improve performance with some
- MPI implementations.
- -- auth/munge - Correct logic to read old format AccountingStoragePass.
- -- Reset node "RESERVED" state as appropriate when deleting a maintenance
- reservation.
- -- Prevent a job manually suspended from being resumed by gang scheduler once
- free resources are available.
- -- Prevent invalid job array task ID value if a task is started using gang
- scheduling.
- -- Fixes for clean build on FreeBSD.
- -- Fix documentation bugs in slurm.conf.5. DenyAccount should be DenyAccounts.
- -- For backward compatibility with older versions of OMPI not compiled
- with --with-pmi restore the SLURM_STEP_RESV_PORTS in the job environment.
- -- Update the html documentation describing the integration with openmpi.
- -- Fix sacct when searching by nodelist.
- -- Fix cosmetic info statements when dealing with a job array task instead of
- a normal job.
- -- Fix segfault with job arrays.
- -- Correct the sbatch pbs parser to process -j.
- -- BGQ - Put print statement under a DebugFlag. This was just an oversight.
- -- BLUEGENE - Remove check that would erroneously remove the CONFIGURING
- flag from a job while the job is waiting for a block to boot.
- -- Fix segfault in slurmstepd when job exceeded memory limit.
- -- Fix race condition that could start a job that is dependent upon a job array
- before all tasks of that job array complete.
- -- PMI2 race condition fix.
- * Changes in Slurm 14.11.2
- ==========================
- -- Fix Centos5 compile errors.
- -- Fix issue with association hash not getting the correct index which
- could result in seg fault.
- -- Fix salloc/sbatch -B segfault.
- -- Avoid huge malloc if GRES configured with "Type" and huge "Count".
- -- Fix jobs from starting in overlapping reservations that won't finish before
- a "maint" reservation begins.
- -- When node gets drained while in state mixed display its status as draining
- in sinfo output.
- -- Allow priority/multifactor to work with sched/wiki(2) if all priorities
- have no weight. This allows for association and QOS decay limits to work.
- -- Fix "squeue --start" to override SQUEUE_FORMAT env variable.
- -- Fix scancel to be able to cancel multiple jobs that are space delimited.
- -- Log Cray MPI job calling exit() without mpi_fini(), but do not treat it as
- a fatal error. This partially reverts logic added in version 14.03.9.
- -- sview - Fix displaying of suspended steps elapsed times.
- -- Increase number of messages that get cached before throwing them away
- when the DBD is down.
- -- Fix jobs from starting in overlapping reservations that won't finish before
- a "maint" reservation begins.
- -- Restore GRES functionality with select/linear plugin. It was broken in
- version 14.03.10.
- -- Fix bug with GRES having multiple types that can cause slurmctld abort.
- -- Fix squeue issue with not recognizing "localhost" in --nodelist option.
- -- Make sure the bitstrings for a partitions Allow/DenyQOS are up to date
- when running from cache.
- -- Add smap support for job arrays and larger job ID values.
- -- Fix possible race condition when attempting to use QOS on a system running
- accounting_storage/filetxt.
- -- Fix issue with accounting_storage/filetxt and job arrays not being printed
- correctly.
- -- In proctrack/linuxproc and proctrack/pgid, check the result of strtol()
- for error condition rather than errno, which might have a vestigial error
- code.
- -- Improve information recording for jobs deferred due to advanced
- reservation.
- -- Exports eio_new_initial_obj to the plugins and initialize kvs_seq on
- mpi/pmi2 setup to support launching.
- * Changes in Slurm 14.11.1
- ==========================
- -- Get libs correct when doing the xtree/xhash make check.
- -- Update xhash/tree make check to work correctly with current code.
- -- Remove the reference 'experimental' for the jobacct_gather/cgroup
- plugin.
- -- Add QOS manipulation examples to the qos.html documentation page.
- -- If 'squeue -w node_name' specifies an unknown host name print
- an error message and return 1.
- -- Fix race condition in job_submit plugin logic that could cause slurmctld to
- deadlock.
- -- Job wait reason of "ReqNodeNotAvail" expanded to identify unavailable nodes
- (e.g. "ReqNodeNotAvail(Unavailable:tux[3-6])").
- * Changes in Slurm 14.11.0
- ==========================
- -- ALPS - Fix issue with core_spec warning.
- -- Allow multiple partitions to be specified in sinfo -p.
- -- Install the service files in /usr/lib/systemd/system.
- -- MYSQL - Add id_array_job and id_resv keys to $CLUSTER_job_table. THIS
- COULD TAKE A WHILE TO CREATE THE KEYS SO BE PATIENT.
- -- CRAY - Resize bitmaps on a restart and find we have more blades
- than before.
- -- Add new eio API function for removing unused connections.
- -- ALPS - Fix issue where batch allocations weren't correctly confirmed or
- released.
- -- Define DEFAULT_MAX_TASKS_PER_NODE based on MAX_TASKS_PER_NODE from
- slurm.h as per documentation.
- -- Update the FAQ about relocating slurmctld.
- -- In the memory cgroup enable memory.use_hierarchy in the cgroup root.
- -- Export eio.c functions for use by MPI/PMI2.
- -- Add SLURM_CLUSTER_NAME to job environment.
- * Changes in Slurm 14.11.0rc3
- =============================
- -- Allow envs to override autotools binaries in autogen.sh
- -- Added system services files.
- -- If the jobs pends with DependencyNeverSatisfied keep it pending even after
- the job which it was depending upon was cleaned.
- -- Let operators (in addition to user root and SlurmUser) see job script for
- other user's jobs.
- -- Perl API modified to return node state of MIXED rather than ALLOCATED if
- only some CPUs allocated.
- -- Double Munge connect retry timeout from 1 to 2 seconds.
- -- sview - Remove unneeded code that was resolved globally in commit
- 98e24b0dedc.
- -- Collect and report the accounting of the batch step and its children.
- -- Add configure checks for faccessat and eaccess, and make use of one of
- them if available.
- -- Make configure --enable-developer also set --enable-debug
- -- Introduce a SchedulerParameters variable kill_invalid_depend, if set
- then jobs pending with invalid dependency are going to be terminated.
- -- Move spank_user_task() call in slurmstepd after the task_g_pre_launch()
- so that the task affinity information is available to spank.
- -- Make /etc/init.d/slurm script return value 3 when the daemon is
- not running. This is required by Linux Standard Base Core
- Specification 3.1
- * Changes in Slurm 14.11.0rc2
- =============================
- -- Logs for jobs which are explicitly requeued will say so rather than saying
- that a node in their allocation failed.
- -- Updated the documentation about the remote licenses served by
- the Slurm database.
- -- Insure that slurm_spank_exit() is only called once from srun.
- -- Change the signature of net_set_low_water() to use 4 bytes instead of 8.
- -- Export working_cluster_rec in libslurmdb.so as well as move some function
- definitions needed for drmaa.
- -- If using cons_res or serial cause a fatal in the plugin instead of causing
- the SelectTypeParameters to magically set to CR_CPU.
- -- Enhance task/affinity auto binding to consider tasks * cpus-per-task.
- -- Fix regression the priority/multifactor which would cause memory corruption.
- Issue is only in rc1.
- -- Add PrivateData value of "cloud". If set, powered down nodes in the cloud
- will be visible.
- -- Sched/backfill - Eliminate clearing start_time of running jobs.
- -- Fix various backwards compatibility issues.
- -- If failed to launch a batch job, requeue it in hold.
- * Changes in Slurm 14.11.0rc1
- =============================
- -- When using cgroup name the batch step as step_batch instead of
- batch_4294967294
- -- Changed LEVEL_BASED priority to be "Fair_Tree"
- -- Port to NetBSD.
- -- BGQ - Add cnode based reservations.
- -- Alongside totalview_jobid implement totalview_stepid available
- to sattach.
- -- Add ability to include other files in slurm.conf based upon the ClusterName.
- -- Update strlcpy to latest upstream version.
- -- Add reservation information in the sacct and sreport output.
- -- Add job priority calculation check for overflow and fix memory leak.
- -- Add SchedulerParameters option of pack_serial_at_end to put serial jobs at
- the end of the available nodes rather than using a best fit algorithm.
- -- Allow regular users to view default sinfo output when
- privatedata=reservations is set.
- -- PrivateData=reservation modified to permit users to view the reservations
- which they have access to (rather then preventing them from seeing ANY
- reservation).
- -- job_submit/lua: Fix job_desc set field logic
- * Changes in Slurm 14.11.0pre5
- ==============================
- -- Fix sbatch --export=ALL, it was treated by srun as a request to explicitly
- export only the environment variable named "ALL".
- -- Improve scheduling of jobs in reservations that overlap other reservations.
- -- Modify sgather to make global file systems easier to configure.
- -- Added sacctmgr reconfig to reread the slurmdbd.conf in the slurmdbd.
- -- Modify scontrol job operations to accept comma delimited list of job IDs.
- Applies to job update, hold, release, suspend, resume, requeue, and
- requeuehold operations.
- -- Refactor job_submit/lua interface. LUA FUNCTIONS NEED TO CHANGE! The
- lua script no longer needs to explicitly load meta-tables, but information
- is available directly using names slurm.reservations, slurm.jobs,
- slurm.log_info, etc. Also, the job_submit.lua script is reloaded when
- updated without restarting the slurmctld daemon.
- -- Allow users to specify --resv_ports to have value 0.
- -- Cray MPMD (Multiple-Program Multiple-Data) support completed.
- -- Added ability for "scontrol update" to references jobs by JobName (and
- filtered optionally by UserID).
- -- Add support for an advanced reservation start time that remains constant
- relative to the current time. This can be used to prevent the starting of
- longer running jobs on select nodes for maintenance purpose. See the
- reservation flag "TIME_FLOAT" for more information.
- -- Enlarge the jobid field to 18 characters in squeue output.
- -- Added "scontrol write config" option to save a copy of the current
- configuration in a file containing a time stamp.
- -- Eliminate native Cray specific port management. Native Cray systems must
- now use the MpiParams configuration parameter to specify ports to be used
- for commmunications. When upgrading Native Cray systems from version 14.03,
- all running jobs should be killed and the switch_cray_state file (in
- SaveStateLocation of the nodes where the slurmctld daemon runs) must be
- explicitly deleted.
- * Changes in Slurm 14.11.0pre4
- ==============================
- -- Added job array data structure and removed 64k array size restriction.
- -- Added SchedulerParameters options of bf_max_job_array_resv to control how
- many tasks of a job array should have resources reserved for them.
- -- Added more validity checking of incoming job submit requests.
- -- Added srun --export option to set/export specific environment variables.
- -- Scontrol modified to print separate error messages for job arrays with
- different exit codes on the different tasks of the job array. Applies to
- job suspend and resume operations.
- -- Fix race condition in CPU frequency set with job preemption.
- -- Always call select plugin on step termination, even if the job is also
- complete.
- -- Srun executable names beginning with "." will be resolved based upon the
- working directory and path on the compute node rather than the submit node.
- -- Add node state string suffix of "$" to identify nodes in maintenance
- reservation or scheduled for reboot. This applies to scontrol, sinfo,
- and sview commands.
- -- Enable scontrol to clear a nodes's scheduled reboot by setting its state
- to "RESUME".
- -- As per sbatch and srun documentation when the --signal option is used
- signal only the steps and unless, in the case, of a batch job B is
- specified in which case signal only the batch script.
- -- Modify AuthInfo configuration parameter to accept credential lifetime
- option.
- -- Modify crypto/munge plugin to use socket and timeout specified in AuthInfo.
- -- If we have a state for a step on completion put that in the database
- instead of guessing off the exit_code.
- -- Added squeue -P/--priority option that can be used to display pending jobs
- in the same order as used by the Slurm scheduler even if jobs are submitted
- to multiple partitions (job is reported once per usable partition).
- -- Improve the pending reason description for various QOS limits. For each
- QOS limit that causes a job to be pending print its specific reason.
- For example if job pends because of GrpCpus the squeue command will
- print QOSGrpCpuLimit as pending reason.
- -- sched/backfill - Set expected start time of job submitted to multiple
- partitions to the earliest start time on any of the partitions.
- -- Introduce a MAX_BATCH_REQUEUE define that indicates how many times a job
- can be requeued upon prolog failure. When the number is reached the job
- is put on hold with reason JobHoldMaxRequeue.
- -- Add sbatch job array option to limit the number of simultaneously running
- tasks from a job array (e.g. "--array=0-15%4").
- -- Implemented a new QOS limit MinCPUs. Users running under a QOS must
- request a minimum number of CPUs which is at least MinCPUs otherwise
- their job will pend.
- -- Introduced a new pending reason WAIT_QOS_MIN_CPUS to reflect the new QOS
- limit.
- -- Job array dependency based upon state is now dependent upon the state of
- the array as a whole (e.g. afterok requires ALL tasks to complete
- sucessfully, afternotok is true if ANY tasks does not complete successfully,
- and after requires all tasks to at least be started).
- -- The srun -u/--unbuffered options set the stdout of the task launched
- by srun to be line buffered.
- -- The srun options -/--label and -u/--unbuffered can be specified together.
- This limitation has been removed.
- -- Provide sacct display of gres accounting information per job.
- -- Change the node status size from uin16_t to uint32_t.
- * Changes in Slurm 14.11.0pre3
- ==============================
- -- Move xcpuinfo.[c|h] to the slurmd since it isn't needed anywhere else
- and will avoid the need for all the daemons to link to libhwloc.
- -- Add memory test to job_submit/partition plugin.
- -- Added new internal Slurm functions xmalloc_nz() and xrealloc_nz(), which do
- not initialize the allocated memory to zero for improved performance.
- -- Modify hostlist function to dynamically allocate buffer space for improved
- performance.
- -- In the job_submit plugin: Remove all slurmctld locks prior to job_submit()
- being called for improved performance. If any slurmctld data structures are
- read or modified, add locks directly in the plugin.
- -- Added PriorityFlag LEVEL_BASED described in doc/html/level_based.shtml
- -- If Fairshare=parent is set on an account, that account's children will be
- effectively reparented for fairshare calculations to the first parent of
- their parent that is not Fairshare=parent. Limits remain the same,
- only it's fairshare value is affected.
- * Changes in Slurm 14.11.0pre2
- ==============================
- -- Added AllowSpecResourcesUsage configuration parameter in slurm.conf. This
- allows jobs to use specialized resources on nodes allocated to them if the
- job designates --core-spec=0.
- -- Add new SchedulerParameters option of build_queue_timeout to throttle how
- much time can be consumed building the job queue for scheduling.
- -- Added HealthCheckNodeState option of "cycle" to cycle through the compute
- nodes over the course of HealthCheckInterval rather than running all at
- the same time.
- -- Add job "reboot" option for Linux clusters. This invokes the configured
- RebootProgram to reboot nodes allocated to a job before it begins execution.
- -- Added squeue -O/--Format option that makes all job and step fields available
- for printing.
- -- Improve database slurmctld entry speed dramatically.
- -- Add "CPUs" count to output of "scontrol show step".
- -- Add support for lua5.2
- -- scancel -b signals only the batch step neither any other step nor any
- children of the shell script.
- -- MySQL - enforce NO_ENGINE_SUBSTITUTION
- -- Added CpuFreqDef configuration parameter in slurm.conf to specify the
- default CPU frequency and governor to be set at job end.
- -- Added support for job email triggers: TIME_LIMIT, TIME_LIMIT_90 (reached
- 90% of time limit), TIME_LIMIT_80 (reached 80% of time limit), and
- TIME_LIMIT_50 (reached 50% of time limit). Applies to salloc, sbatch and
- srun commands.
- -- In slurm.conf add the parameter SrunPortRange=min-max. If this is configured
- then srun will use its dynamic ports only from the configured range.
- -- Make debug_flags 64 bit to handle more flags.
- * Changes in Slurm 14.11.0pre1
- ==============================
- -- Modify etc/cgroup.release_common.example to set specify full path to the
- scontrol command. Also find cgroup mount point by reading cgroup.conf file.
- -- Improve qsub wrapper support for passing environment variables.
- -- Modify sdiag to report Slurm RPC traffic by user, type, count and time
- consumed.
- -- In select plugins, stop triggering extra logging based upon the debug flag
- CPU_Bind and use SelectType instead.
- -- Added SchedulerParameters options of bf_yield_interval and bf_yield_sleep
- to control how frequently and for how long the backfill scheduler will
- relinquish its locks.
- -- To support larger numbers of jobs when the StateSaveDirectory is on a
- file system that supports a limited number of files in a directory, add a
- subdirectory called "hash.#" based upon the last digit of the job ID.
- -- More gracefully handle missing batch script file. Just kill the job and do
- not drain the compute node.
- -- Add support for allocation of GRES by model type for heterogenous systems
- (e.g. request a Kepler GPU, a Tesla GPU, or a GPU of any type).
- -- Record and enable display of nodes anticipated to be used for pending jobs.
- -- Modify squeue --start option to print the nodes expected to be used for
- pending job (in addition to expected start time, etc.).
- -- Add association hash to the assoc_mgr.
- -- Better logic to handle resized jobs when the DBD is down.
- -- Introduce MemLimitEnforce yes|no in slurm.conf. If set no Slurm will
- not terminate jobs if they exceed requested memory.
- -- Add support for non-consumable generic resources for resources that are
- limited, but can be shared between jobs.
- -- Introduce 5 new Slurm errors in slurm_errno.h related to job to better
- report error conditions.
- -- Modify scontrol to print error message for each array task when updating
- the entire array.
- -- Added gres_drain and gres_used fields to node_info_t.
- -- Added PriorityParameters configuration parameter in slurm.conf.
- -- Introduce automatic job requeue policy based on exit value. See RequeueExit
- and RequeueExitHold descriptions in slurm.conf man page.
- -- Modify slurmd to cache launched job IDs for more responsive job suspend and
- gang scheduling.
- -- Permit jobs steps full control over cpu_bind options if specialized cores
- are included in the job allocation.
- -- Added ChosLoc configuration parameter to specifiy the pathname of the
- Chroot OS tool.
- -- Sent SIGCONT/SIGTERM when a job is selected for preemption with GraceTime
- configured rather than waiting for GraceTime to be reached before notifying
- the job.
- -- Do not resume a job with specialized cores on a node running another job
- with specialized cores (only one can run at a time).
- -- Add specialized core count to job suspend/resume calls.
- -- task/affinity and task/cgroup - Correct specialized core task binding with
- user supplied invalid CPU mask or map.
- -- Add srun --cpu-freq options to set the CPU governor (OnDemand, Performance,
- PowerSave or UserSpace).
- -- Add support for a job step's CPU governor and/or frequency to be reset on
- suspend/resume (or gang scheduling). The default for an idle CPU will now
- be "ondemand" rather than "userspace" with the lowest frequency (to recover
- from hard slurmd failures and support gang scheduling).
- -- Added PriorityFlags option of Calulate_Running to continue recalculating
- the priority of running jobs.
- -- Replace round-robin front-end node selection with least-loaded algorithm.
- -- CRAY - Improve support of XC30 systems when running natively.
- -- Add new node configuration parameters CoreSpecCount, CPUSpecList and
- MemSpecLimit which support the reservation of resources for system use
- with Linux cgroup.
- -- Add child_forked() function to the slurm_acct_gather_profile plugin to
- close open files, leaving application with no extra open file descriptors.
- -- Cray/ALPS system - Enable backup controller to run outside of the Cray to
- accept new job submissions and most other operations on the pending jobs.
- -- Have sacct print job and task array id's for job arrays.
- -- Smooth out fanout logic
- -- If <sys/prctl.h> is present name major threads in slurmctld, for
- example backfill
- thread: slurmctld_bckfl, the rpc manager: slurmctld_rpcmg etc.
- The name can be seen for example using top -H.
- -- sview - Better job_array support.
- -- Provide more precise error message when job allocation can not be satisfied
- (e.g. memory, disk, cpu count, etc. rather than just "node configuration
- not available").
- -- Create a new DebugFlags named TraceJobs in slurm.conf to print detailed
- information about jobs in slurmctld. The information include job ids, state
- and node count.
- -- When a job dependency can never be satisfied do not cancel the job but keep
- pending with reason WAIT_DEP_INVALID (DependencyNeverSatisfied).
- * Changes in Slurm 14.03.12
- ===========================
- -- Make it so previous versions of salloc/srun work with newer versions
- of Slurm daemons.
- -- PMI2 race condition fix.
- -- Avoid delay on commit for PMI rank 0 to improve performance with some
- MPI implementations.
- -- Correct the sbatch pbs parser to process -j.
- -- Squeue modified to not merge tasks of a job array if their wait reasons
- differ.
- -- Use the slurm_getpwuid_r wrapper of getpwuid_r to handle possible
- interrupts.
- -- Allow --ignore-pbs to take effect when read as an #SBATCH argument.
- -- Do not launch step if job killed while the prolog was running.
- * Changes in Slurm 14.03.11
- ===========================
- -- ALPS - Fix depth for Memory items in BASIL with CLE 5.2
- (changed starting in 5.2.3).
- -- ALPS - Fix issue when tracking memory on a PerNode basis instead of
- PerCPU.
- -- Modify assoc_mgr_fill_in_qos() to allow for a flag to know if the QOS read
- lock was locked outside of the function or not.
- -- Give even better estimates on pending node count if no node count
- is requested.
- -- Fix jobcomp/mysql plugin for MariaDB 10+/Mysql 5.6+ to work with reserved
- work "partition".
- -- If requested (scontrol reboot node_name) reboot a node even if it has
- an maintenance reservation that is not active yet.
- -- Fix issue where exclusive allocations wouldn't lay tasks out correctly
- with CR_PACK_NODES.
- -- Do not requeue a batch job from slurmd daemon if it is killed while in
- the process of being launched (a race condition introduced in v14.03.9).
- -- Do not let srun overwrite SLURM_JOB_NUM_NODES if already in an allocation.
- -- Prevent a job's end_time from being too small after a basil reservation
- error.
- -- Fix sbatch --ntasks-per-core option from setting invalid
- SLURM_NTASKS_PER_CORE environment value.
- -- Prevent scancel abort when no job satisfies filter options.
- -- ALPS - Fix --ntasks-per-core option on multiple nodes.
- -- Double max string that Slurm can pack from 16MB to 32MB to support
- larger MPI2 configurations.
- -- Fix Centos5 compile issues.
- -- Log Cray MPI job calling exit() without mpi_fini(), but do not treat it as
- a fatal error. This partially reverts logic added in version 14.03.9.
- -- sview - Fix displaying of suspended steps elapsed times.
- -- Increase number of messages that get cached before throwing them away
- when the DBD is down.
- -- Fix jobs from starting in overlapping reservations that won't finish before
- a "maint" reservation begins.
- -- Fix "squeue --start" to override SQUEUE_FORMAT env variable.
- -- Restore GRES functionality with select/linear plugin. It was broken in
- version 14.03.10.
- -- Fix possible race condition when attempting to use QOS on a system running
- accounting_storage/filetxt.
- -- Sanity check for Correct QOS on startup.
- * Changes in Slurm 14.03.10
- ===========================
- -- Fix a few sacctmgr error messages.
- -- Treat non-zero SlurmSchedLogLevel without SlurmSchedLogFile as a fatal
- error.
- -- Correct sched_config.html documentation SchedulingParameters
- should be SchedulerParameters.
- -- When using gres and cgroup ConstrainDevices set correct access
- permission for the batch step.
- -- Fix minor memory leak in jobcomp/mysql on slurmctld reconfig.
- -- Fix bug that prevented preservation of a job's GRES bitmap on slurmctld
- restart or reconfigure (bug was introduced in 14.03.5 "Clear record of a
- job's gres when requeued" and only applies when GRES mapped to specific
- files).
- -- BGQ: Fix race condition when job fails due to hardware failure and is
- requeued. Previous code could result in slurmctld abort with NULL pointer.
- -- Prevent negative job array index, which could cause slurmctld to crash.
- -- Fix issue with squeue/scontrol showing correct node_cnt when only tasks
- are specified.
- -- Check the status of the database connection before using it.
- -- ALPS - If an allocation requests -n set the BASIL -N option to the
- amount of tasks / number of node.
- -- ALPS - Don't set the env var APRUN_DEFAULT_MEMORY, it is not needed anymore.
- -- Fix potential buffer overflow.
- -- Give better estimates on pending node count if no node count is requested.
- -- BLUEGENE - Fix issue where requeuing jobs could cause an assert.
- * Changes in Slurm 14.03.9
- ==========================
- -- If slurmd fails to stat(2) the configuration print the string describing
- the error code.
- -- Fix for mixing core base reservations with whole node based reservations
- to avoid overlapping erroneously.
- -- BLUEGENE - Remove references to Base Partition.
- -- sview - If compiled on a non-bluegene system then used to view a BGQ fix
- to allow sview to display blocks correctly.
- -- Fix bug in update reservation. When modifying the reservation the end time
- was set incorrectly.
- -- The start time of a reservation that is in ACTIVE state cannot be modified.
- -- Update the cgroup documentation about release agent for devices.
- -- MYSQL - fix for setting up preempt list on a QOS for multiple QOS.
- -- Correct a minor error in the scancel.1 man page related to the
- --signal option.
- -- Enhance the scancel.1 man page to document the sequence of signals sent
- -- Fix slurmstepd core dump if the cgroup hierarchy is not completed
- when terminating the job.
- -- Fix hostlist_shift to be able to give correct node names on names with a
- different number of dimensions than the cluster.
- -- BLUEGENE - Fix invalid pointer in corner case in the plugin.
- -- Make sure on a reconfigure the select information for a node is preserved.
- -- Correct logic to support job GRES specification over 31 bits (problem
- in logic converting int to uint32_t).
- -- Remove logic that was creating GRES bitmap for node when not needed (only
- needed when GRES mapped to specific files).
- -- BLUEGENE - Fix sinfo -tr before it would only print idle nodes correctly.
- -- BLUEGENE - Fix for licenses_only reservation on bluegene systems.
- -- sview - Verify pointer before using strchr.
- -- -M option on tools talking to a Cray from a non-Cray fixed.
- -- CRAY - Fix rpmbuild issue for missing file slurm.conf.template.
- -- Fix race condition when dealing with removing many associations at
- different times when reservations are using the associations that are
- being deleted.
- -- When a node's state is set to power_down/power_up, then execute
- SuspendProgram/ResumeProgram even if previously executed for that node.
- -- Fix logic determining when job configuration (i.e. running node power up
- logic) is complete.
- -- Setting the state of a node in powered down state node to "resume" will
- no longer cause it to reboot, but only clear the "drain" state flag.
- -- Fix srun documentation to remove SLURM_NODELIST being equivalent as the -w
- option (since it isn't).
- -- Fix issue with --hint=nomultithread and allocations with steps running
- arbitrary layouts (test1.59).
- -- PrivateData=reservation modified to permit users to view the reservations
- which they have access to (rather then preventing them from seeing ANY
- reservation). Backport from 14.11 commit 77c2bd25c.
- -- Fix PrivateData=reservation when using associations to give privileges to
- a reservation.
- -- Better checking to see if select plugin is linear or not.
- -- Add support for time specification of "fika" (3 PM).
- -- Standardize qstat wrapper more.
- -- Provide better estimate of minimum node count for pending jobs using more
- job parameters.
- -- ALPS - Add SubAllocate to cray.conf file for those who like the way <=2.5
- did the ALPS reservation.
- -- Safer check to avoid invalid reads when shutting down the slurmctld with
- lots of jobs.
- -- Fix minor memory leak in the backfill scheduler when shutting down.
- -- Add ArchiveResvs to the output of sacctmgr show config and init the variable
- on slurmdbd startup.
- -- SLURMDBD - Only set the archive flag if purging the object
- (i.e ArchiveJobs PurgeJobs). This is only a cosmetic change.
- -- Fix for job step memory allocation logic if step requests GRES and memory
- is not allocations are not managed.
- -- Fix sinfo to display mixed nodes as allocated in '%F' output.
- -- Sview - Fix cpu and node counts for partitions.
- -- Ignore NO_VAL in SLURMDB_PURGE_* macros.
- -- ALPS - Don't drain nodes if epilog fails. It leaves them in drain state
- with no way to get them out.
- -- Fix issue with task/affinity oversubscribing cpus erroneously when
- using --ntasks-per-node.
- -- MYSQL - Fix load of archive files.
- -- Treat Cray MPI job calling exit() without mpi_fini() as fatal error for
- that specific task and let srun handle all timeout logic.
- -- Fix small memory leak in jobcomp/mysql.
- -- Correct tracking of licenses for suspended jobs on slurmctld reconfigure or
- restart.
- -- If failed to launch a batch job requeue it in hold.
- * Changes in Slurm 14.03.8
- ==========================
- -- Fix minor memory leak when Job doesn't have nodes on it (Meaning the job
- has finished)
- -- Fix sinfo/sview to be able to query against nodes in reserved and other
- states.
- -- Make sbatch/salloc read in (SLURM|(SBATCH|SALLOC))_HINT in order to
- handle sruns in the script that will use it.
- -- srun properly interprets a leading "." in the executable name based upon
- the working directory of the compute node rather than the submit host.
- -- Fix Lustre misspellings in hdf5 guide
- -- Fix wrong reference in slurm.conf man page to what --profile option should
- be used for AcctGatherFilesystemType.
- -- Update HDF5 document to point out the SlurmdUser is who creates the
- ProfileHDF5Dir directory as well as all it's sub-directories and files.
- -- CRAY NATIVE - Remove error message for srun's ran inside an salloc that
- had --network= specified.
- -- Defer job step initiation of required GRES are in use by other steps rather
- than immediately returning an error.
- -- Deprecate --cpu_bind from sbatch and salloc. These never worked correctly
- and only caused confusion since the cpu_bind options mostly refer to a
- step we opted to only allow srun to set them in future versions.
- -- Modify sgather to work if Nodename and NodeHostname differ.
- -- Changed use of JobContainerPlugin where it should be JobContainerType.
- -- Fix for possible error if job has GRES, but the step explicitly requests a
- GRES count of zero.
- -- Make "srun --gres=none ..." work when executed without a job allocation.
- -- Change the global eio_shutdown_time to a field in eio handle.
- -- Advanced reservation fixes for heterogeneous systems, especially when
- reserving cores.
- -- If --hint=nomultithread is used in a job allocation make sure any srun's
- ran inside the allocation can read the environment correctly.
- -- If batchdir can't be made set errno correctly so the slurmctld is notified
- correctly.
- -- Remove repeated batch complete if batch directory isn't able to be made
- since the slurmd will send the same message.
- -- sacctmgr fix default format for list transactions.
- -- BLUEGENE - Fix backfill issue with backfilling jobs on blocks already
- reserved for higher priority jobs.
- -- When creating job arrays the job specification files for each elements
- are hard links to the first element specification files. If the controller
- fails to make the links the files are copied instead.
- -- Fix error handling for job array create failure due to inability to copy
- job files (script and environment).
- -- Added patch in the contribs directory for integrating make version 4.0 with
- Slurm and renamed the previous patch "make-3.81.slurm.patch".
- -- Don't wait for an update message from the DBD to finish before sending rc
- message back. In slow systems with many associations this could speed
- responsiveness in sacctmgr after adding associations.
- -- Eliminate race condition in enforcement of MaxJobCount limit for job arrays.
- -- Fix anomaly allocating cores for GRES with specific device/CPU mapping.
- -- cons_res - When requesting exclusive access make sure we set the number
- of cpus in the job_resources_t structure so as nodes finish the correct
- cpu count is displayed in the user tools.
- -- If the job_submit plugin calls take longer than 1 second to run, print a
- warning.
- -- Make sure transfer_s_p_options transfers all the portions of the
- s_p_options_t struct.
- -- Correct the srun man page, the SLURM_CPU_BIND_VERBOSE, SLURM_CPU_BIND_TYPE
- SLURM_CPU_BIND_LIST environment variable are set only when task/affinity
- plugin is configured.
- -- sacct - Initialize variables correctly to avoid incorrect structure
- reference.
- -- Performance adjustment to avoid calling a function multiple times when it
- only needs to be called once.
- -- Give more correct waiting reason if job is waiting on association/QOS
- MaxNode limit.
- -- DB - When sending lft updates to the slurmctld only send non-deleted lfts.
- -- BLUEGENE - Fix documentation on how to build a reservation less than
- a midplane.
- -- If Slurmctld fails to read the job environment consider it an error
- and abort the job.
- -- Add the name of the node a job is running on to the message printed by
- slurmstepd when terminating a job.
- -- Remove unsupported options from sacctmgr help and the dump function.
- -- Update sacctmgr man page removing reference to obsolete parameter
- MaxProcSecondsPerJob.
- -- Added more validity checking of incoming job submit requests.
- * Changes in Slurm 14.03.7
- ==========================
- -- Correct typos in man pages.
- -- Add note to MaxNodesPerUser and multiple jobs running on the same node
- counting as multiple nodes.
- -- PerlAPI - fix renamed call from slurm_api_set_conf_file to
- slurm_conf_reinit.
- -- Fix gres race condition that could result in job deallocation error message.
- -- Correct NumCPUs count for jobs with --exclusive option.
- -- When creating reservation with CoreCnt, check that Slurm uses
- SelectType=select/cons_res, otherwise don't send the request to slurmctld
- and return an error.
- -- Save the state of scheduled node reboots so they will not be lost should the
- slurmctld restart.
- -- In select/cons_res plugin - Insure the node count does not exceed the task
- count.
- -- switch/nrt - Do not explicitly unload windows for a job on termination,
- only unload its table (which automatically unloads its windows).
- -- When HealthCheckNodeState is configured as IDLE don't run the
- HealthCheckProgram for nodes in any other states than IDLE.
- -- Remove all slurmctld locks prior to job_submit() being called in plugins.
- If any slurmctld data structures are read or modified, add locks directly
- in the plugin.
- -- Minor sanity check to verify the string sent in isn't NULL when using
- bit_unfmt.
- -- CRAY NATIVE - Fix issue on heavy systems to only run the NHC once per
- job/step completion.
- -- Remove unneeded step cleanup for pending steps.
- -- Fix issue where if a batch job was manually requeued the batch step
- information wasn't stored in accounting.
- -- When job is release from a requeue hold state clean up its previous
- exit code.
- -- Correct the srun man page about how the output from the user application
- is sent to srun.
- -- Increase the timeout of the main thread while waiting for the i/o thread.
- Allow up to 180 seconds for the i/o thread to complete.
- -- When using sacct -c to read the job completion data compute the correct
- job elapsed time.
- -- Perl package: Define some missing node states.
- -- When using AccountingStorageType=accounting_storage/mysql zero out the
- database index for the array elements avoiding duplicate database values.
- -- Reword the explanation of cputime and cputimeraw in the sacct man page.
- -- JobCompType allows "jobcomp/mysql" as valid name but the code used
- "job_comp/mysql" setting an incorrect default database.
- -- Try to load libslurm.so only when necessary.
- -- When nodes scheduled for reboot, set state to DOWN rather than FUTURE so
- they are still visible to sinfo. State set to IDLE after reboot completes.
- -- Apply BatchStartTimeout configuration to task launch and avoid aborting
- srun commands due to long running Prolog scripts.
- -- Fix minor memory leaks when freeing node_info_t structure.
- -- Fix various memory leaks in sview
- -- If a batch script is requeued and running steps get correct exit code/signal
- previous it was always -2.
- -- If step exitcode hasn't been set display with sacct the -2 instead
- of acting like it is a signal and exitcode.
- -- Send calculated step_rc for batch step instead of raw status as
- done for normal steps.
- -- If a job times out, set the exit code in accounting to 1 instead of the
- signal 1.
- -- Update the acct_gather.conf.5 man page removing the reference to
- InfinibandOFEDFrequency.
- -- Fix gang scheduling for jobs submitted to multiple partitions.
- -- Enable srun to submit job to multiple partitions.
- -- Update slurm.conf man page. When Epilog or Prolog fail the node state
- is set ro DRAIN.
- -- Start a job in the highest priority partition possible, even if it requires
- preempting other jobs and delaying initiation, rather than using a lower
- priority partition. Previous logic would preempt lower priority jobs, but
- then might start the job in a lower priority partition and not use the
- resources released by the preempted jobs.
- -- Fix SelectTypeParameters=CR_PACK_NODES for srun making both job and step
- resource allocation.
- -- BGQ - Make it possible to pack multiple tasks on a core when not using
- the entire cnode.
- -- MYSQL - if unable to connect to mysqld close connection that was inited.
- -- DBD - when connecting make sure we wait MessageTimeout + 5 since the
- timeout when talking to the Database is the same timeout so a race
- condition could occur in the requesting client when receiving the response
- if the database is unresponsive.
- * Changes in Slurm 14.03.6
- ==========================
- -- Added examples to demonstrate the use of the sacct -T option to the man
- page.
- -- Fix for regression in 14.03.5 with sacctmgr load when Parent has "'"
- around it.
- -- Update comments in sacctmgr dump header.
- -- Fix for possible abort on change in GRES configuration.
- -- CRAY - fix modules file, (backport from 14.11 commit 78fe86192b.
- -- Fix race condition which could result in requeue if batch job exit and node
- registration occur at the same time.
- -- switch/nrt - Unload job tables (in addition to windows) in user space mode.
- -- Differentiate between two identical debug messages about purging vestigial
- job scripts.
- -- If the socket used by slurmstepd to communicate with slurmd exist when
- slurmstepd attempts to create it, for example left over from a previous
- requeue or crash, delete it and recreate it.
- * Changes in Slurm 14.03.5
- ==========================
- -- If a srun runs in an exclusive allocation and doesn't use the entire
- allocation and CR_PACK_NODES is set layout tasks appropriately.
- -- Correct Shared field in job state information seen by scontrol, sview, etc.
- -- Print Slurm error string in scontrol update job and reset the Slurm errno
- before each call to the API.
- -- Fix task/cgroup to handle -mblock:fcyclic correctly
- -- Fix for core-based advanced reservations where the distribution of cores
- across nodes is not even.
- -- Fix issue where association maxnodes wouldn't be evaluated correctly if a
- QOS had a GrpNodes set.
- -- GRES fix with multiple files defined per line in gres.conf.
- -- When a job is requeued make sure accounting marks it as such.
- -- Print the state of requeued job as REQUEUED.
- -- Fix if a job's partition was taken away from it don't allow a requeue.
- -- Make sure we lock on the conf when sending slurmd's conf to the slurmstepd.
- -- Fix issue with sacctmgr 'load' not able to gracefully handle bad formatted
- file.
- -- sched/backfill: Correct job start time estimate with advanced reservations.
- -- Error message added when in proctrack/cgroup the step freezer path isn't
- able to be destroyed for debug.
- -- Added extra index's into the database for better performance when
- deleting users.
- -- Fix issue with wckeys when tracking wckeys, but not enforcing them,
- you could get multiple '*' wckeys.
- -- Fix bug which could report to squeue the wrong partition for a running job
- that is submitted to multiple partitions.
- -- Report correct CPU count allocated to job when allocated whole node even if
- not using all CPUs.
- -- If job's constraints cannot be satisfied put it in pending state with reason
- BadConstraints and don't remove it.
- -- sched/backfill - If job started with infinite time limit, set its end_time
- one year in the future.
- -- Clear record of a job's gres when requeued.
- -- Clear QOS GrpUsedCPUs when resetting raw usage if QOS is not using any cpus.
- -- Remove log message left over from debugging.
- -- When using CR_PACK_NODES fix make --ntasks-per-node work correctly.
- -- Report correct partition associated with a step if the job is submitted to
- multiple partitions.
- -- Fix to allow removing of preemption from a QOS
- -- If the proctrack plugins fail to destroy the job container print an error
- message and avoid to loop forever, give up after 120 seconds.
- -- Make srun obey POSIX convention and increase the exit code by 128 when the
- process terminated by a signal.
- -- Sanity check for acct_gather_energy/rapl
- -- If the proctrack plugins fail to destroy the job container print an error
- message and avoid to loop forever, give up after 120 seconds.
- -- If the sbatch command specifies the option --signal=B:signum sent the signal
- to the batch script only.
- -- If we cancel a task and we have no other exit code send the signal and
- exit code.
- -- Added note about InnoDB storage engine being used with MySQL.
- -- Set the job exit code when the job is signaled and set the log level to
- debug2() when processing an already completed job.
- -- Reset diagnostics time stamp when "sdiag --reset" is called.
- -- squeue and scontrol to report a job's "shared" value based upon partition
- options rather than reporting "unknown" if job submission does not use
- --exclusive or --shared option.
- -- task/cgroup - Fix cpuset binding for batch script.
- -- sched/backfill - Fix anomaly that could result in jobs being scheduled out
- of order.
- -- Expand pseudo-terminal size data structure field sizes from 8 to 16 bits.
- -- Set the job exit code when the job is signaled and set the log level to
- debug2() when processing an already completed job.
- -- Distinguish between two identical error messages.
- -- If using accounting_storage/mysql directly without a DBD fix issue with
- start of requeued jobs.
- -- If a job fails because of batch node failure and the job is requeued and an
- epilog complete message comes from that node do not process the batch step
- information since the job has already been requeued because the epilog
- script running isn't guaranteed in this situation.
- -- Change message to note a NO_VAL for return code could of come from node
- failure as well as interactive user.
- -- Modify test4.5 to only look at one partition instead of all of them.
- -- Fix sh5util -u to accept username different from the user that runs the
- command.
- -- Corrections to man pages:salloc.1 sbatch.1 srun.1 nonstop.conf.5
- slurm.conf.5.
- -- Restore srun --pty resize ability.
- -- Have sacctmgr dump cluster handle situations where users or such have
- special characters in their names like ':'
- -- Add more debugging for information should the job ran on wrong node
- and should there be problems accessing the state files.
- * Changes in Slurm 14.03.4
- ==========================
- -- Fix issue where not enforcing QOS but a partition either allows or denies
- them.
- -- CRAY - Make switch/cray default when running on a Cray natively.
- -- CRAY - Make job_container/cncu default when running on a Cray natively.
- -- Disable job time limit change if it's preemption is in progress.
- -- Correct logic to properly enforce job preemption GraceTime.
- -- Fix sinfo -R to print each down/drained node once, rather than once per
- partition.
- -- If a job has non-responding node, retry job step create rather than
- returning with DOWN node error.
- -- Support SLURM_CONF path which does not have "slurm.conf" as the file name.
- -- CRAY - make job_container/cncu default when running on a Cray natively
- -- Fix issue where batch cpuset wasn't looked at correctly in
- jobacct_gather/cgroup.
- -- Correct squeue's job node and CPU counts for requeued jobs.
- -- Correct SelectTypeParameters=CR_LLN with job selecition of specific nodes.
- -- Only if ALL of their partitions are hidden will a job be hidden by default.
- -- Run EpilogSlurmctld for a job is killed during slurmctld reconfiguration.
- -- Close window with srun if waiting for an allocation and while printing
- something you also get a signal which would produce deadlock.
- -- Add SelectTypeParameters option of CR_PACK_NODES to pack a job's tasks
- tightly on its allocated nodes rather than distributing them evenly across
- the allocated nodes.
- -- cpus-per-task support: Try to pack all CPUs of each tasks onto one socket.
- Previous logic could spread the tasks CPUs across multiple sockets.
- -- Add new distribution method fcyclic so when a task is using multiple cpus
- it can bind cyclically across sockets.
- -- task/affinity - When using --hint=nomultithread only bind to the first
- thread in a core.
- -- Make cgroup task layout (block | cyclic) method mirror that of
- task/affinity.
- -- If TaskProlog sets SLURM_PROLOG_CPU_MASK reset affinity for that task
- based on the mask given.
- -- Keep supporting 'srun -N x --pty bash' for historical reasons.
- -- If EnforcePartLimits=Yes and QOS job is using can override limits, allow
- it.
- -- Fix issues if partition allows or denies account's or QOS' and either are
- not set.
- -- If a job requests a partition and it doesn't allow a QOS or account the
- job is requesting pend unless EnforcePartLimits=Yes. Before it would
- always kill the job at submit.
- -- Fix format output of scontrol command when printing node state.
- -- Improve the clean up of cgroup hierarchy when using the
- jobacct_gather/cgroup plugin.
- -- Added SchedulerParameters value of Ignore_NUMA.
- -- Fix issues with code when using automake 1.14.1
- -- select/cons_res plugin: Fix memory leak related to job preemption.
- -- After reconfig rebuild the job node counters only for jobs that have
- not finished yet, otherwise if requeued the job may enter an invalid
- COMPLETING state.
- -- Do not purge the script and environment files for completed jobs on
- slurmctld reconfiguration or restart (they might be later requeued).
- -- scontrol now accepts the option job=xxx or jobid=xxx for the requeue,
- requeuehold and release operations.
- -- task/cgroup - fix to bind batch job in the proper CPUs.
- -- Added strigger option of -N, --noheader to not print the header when
- displaying a list of triggers.
- -- Modify strigger to accept arguments to the program to execute when an
- event trigger occurs.
- -- Attempt to create duplicate event trigger now generates ESLURM_TRIGGER_DUP
- ("Duplicate event trigger").
- -- Treat special characters like %A, %s etc. literally in the file names
- when specified escaped e.g. sbatch -o /home/zebra\\%s will not expand
- %s as the stepid of the running job.
- -- CRAYALPS - Add better support for CLE 5.2 when running Slurm over ALPS.
- -- Test time when job_state file was written to detect multiple primary
- slurmctld daemons (e.g. both backup and primary are functioning as
- primary and there is a split brain problem).
- -- Fix scontrol to accept update jobid=# numtasks=#
- -- If the backup slurmctld assumes primary status, then do NOT purge any
- job state files (batch script and environment files) and do not re-use them.
- This may indicate that multiple primary slurmctld daemons are active (e.g.
- both backup and primary are functioning as primary and there is a split
- brain problem).
- -- Set correct error code when requeuing a completing/pending job
- -- When checking for if dependency of type afterany, afterok and afternotok
- don't clear the dependency if the job is completing.
- -- Cleanup the JOB_COMPLETING flag and eventually requeue the job when the
- last epilog completes, either slurmd epilog or slurmctld epilog, whichever
- comes last.
- -- When attempting to requeue a job distinguish the case in which the job is
- JOB_COMPLETING or already pending.
- -- When reconfiguring the controller don't restart the slurmctld epilog if it
- is already running.
- -- Email messages for job array events print now use the job ID using the
- format "#_# (#)" rather than just the internal job ID.
- -- Set the number of free licenses to be 0 if the global license count
- decreases and total is less than in use.
- -- Add DebugFlag of BackfillMap. Previously a DebugFlag value of Backfill
- logged information about what it was doing plus a map of expected resouce
- use in the future. Now that very verbose resource use map is only logged
- with a DebugFlag value of BackfillMap
- -- Fix slurmstepd core dump.
- -- Modify the description of -E and -S option of sacct command as point in time
- 'before' or 'after' the database records are returned.
- -- Correct support for partition with Shared=YES configuration.
- -- If job requests --exclusive then do not use nodes which have any cores in an
- advanced reservation. Also prevents case where nodes can be shared by other
- jobs.
- -- For "scontrol --details show job" report the correct CPU_IDs when thre are
- multiple threads per core (we are translating a core bitmap to CPU IDs).
- -- If DebugFlags=Protocol is configured in slurm.conf print details of the
- connection, ip address and port accepted by the controller.
- -- Fix minor memory leak when reading in incomplete node data checkpoint file.
- -- Enlarge the width specifier when printing partition SHARE to display larger
- sharing values.
- -- sinfo locks added to prevent possibly duplicate record printing for
- resources in multiple partitions.
- * Changes in Slurm 14.03.3-2
- ============================
- -- BGQ - Fix issue with uninitialized variable.
- * Changes in Slurm 14.03.3
- ==========================
- -- Correction to default batch output file name. In version 14.03.2 was using
- "slurm_<jobid>_4294967294.out" due to error in job array logic.
- -- In slurm.spec file, replace "Requires cray-MySQL-devel-enterprise" with
- "Requires mysql-devel".
- * Changes in Slurm 14.03.2
- ==========================
- -- Fix race condition if PrologFlags=Alloc,NoHold is used.
- -- Cray - Make NPC only limit running other NPC jobs on shared blades instead
- of limited non NPC jobs.
- -- Fix for sbatch #PBS -m (mail) option parsing.
- -- Fix job dependency bug. Jobs dependent upon multiple other jobs may start
- prematurely.
- -- Set "Reason" field for all elements of a job array on short-circuited
- scheduling for job arrays.
- -- Allow -D option of salloc/srun/sbatch to specify relative path.
- -- Added SchedulerParameter of batch_sched_delay to permit many batch jobs
- to be submitted between each scheduling attempt to reduce overhead of
- scheduling logic.
- -- Added job reason of "SchedTimeout" if the scheduler was not able to reach
- the job to attempt scheduling it.
- -- Add job's exit state and exit code to email message.
- -- scontrol hold/release accepts job name option (in addition to job ID).
- -- Handle when trying to cancel a step that hasn't started yet better.
- -- Handle Max/GrpCPU limits better
- -- Add --priority option to salloc, sbatch and srun commands.
- -- Honor partition priorities over job priorities.
- -- Fix sacct -c when using jobcomp/filetxt to read newer variables
- -- Fix segfault of sacct -c if spaces are in the variables.
- -- Release held job only with "scontrol release <jobid>" and not by resetting
- the job's priority. This is needed to support job arrays better.
- -- Correct squeue command not to merge jobs with state pending and completing
- together.
- -- Fix issue where user is requesting --acctg-freq=0 and no memory limits.
- -- Fix issue with GrpCPURunMins if a job's timelimit is altered while the job
- is running.
- -- Temporary fix for handling our typemap for the perl api with newer perl.
- -- Fix allowgroup on bad group seg fault with the controller.
- -- Handle node ranges better when dealing with accounting max node limits.
- * Changes in Slurm 14.03.1-2
- ==========================
- -- Update configure to set correct version without having to run autogen.sh
- * Changes in Slurm 14.03.1
- ==========================
- -- Add support for job std_in, std_out and std_err fields in Perl API.
- -- Add "Scheduling Configuration Guide" web page.
- -- BGQ - fix check for jobinfo when it is NULL
- -- Do not check cleaning on "pending" steps.
- -- task/cgroup plugin - Fix for building on older hwloc (v1.0.2).
- -- In the PMI implementation by default don't check for duplicate keys.
- Set the SLURM_PMI_KVS_DUP_KEYS if you want the code to check for
- duplicate keys.
- -- Add job submission time to squeue.
- -- Permit user root to propagate resource limits higher than the hard limit
- slurmd has on that compute node has (i.e. raise both current and maximum
- limits).
- -- Fix issue with license used count when doing an scontrol reconfig.
- -- Fix the PMI iterator to not report duplicated keys.
- -- Fix issue with sinfo when -o is used without the %P option.
- -- Rather than immediately invoking an execution of the scheduling logic on
- every event type that can enable the execution of a new job, queue its
- execution. This permits faster execution of some operations, such as
- modifying large counts of jobs, by executing the scheduling logic less
- frequently, but still in a timely fashion.
- -- If the environment variable is greater than MAX_ENV_STRLEN don't
- set it in the job env otherwise the exec() fails.
- -- Optimize scontrol hold/release logic for job arrays.
- -- Modify srun to report an exit code of zero rather than nine if some tasks
- exit with a return code of zero and others are killed with SIGKILL. Only an
- exit code of zero did this.
- -- Fix a typo in scontrol man page.
- -- Avoid slurmctld crash getting job info if detail_ptr is NULL.
- -- Fix sacctmgr add user where both defaultaccount and accounts are specified.
- -- Added SchedulerParameters option of max_sched_time to limit how long the
- main scheduling loop can execute for.
- -- Added SchedulerParameters option of sched_interval to control how frequently
- the main scheduling loop will execute.
- -- Move start time of main scheduling loop timeout after locks are aquired.
- -- Add squeue job format option of "%y" to print a job's nice value.
- -- Update scontrol update jobID logic to operate on entire job arrays.
- -- Fix PrologFlags=Alloc to run the prolog on each of the nodes in the
- allocation instead of just the first.
- -- Fix race condition if a step is starting while the slurmd is being
- restarted.
- -- Make sure a job's prolog has ran before starting a step.
- -- BGQ - Fix invalid memory read when using DefaultConnType in the
- bluegene.conf
- -- Make sure we send node state to the DBD on clean start of controller.
- -- Fix some sinfo and squeue sorting anomalies due to differences in data
- types.
- -- Only send message back to slurmctld when PrologFlags=Alloc is used on a
- Cray/ALPS system, otherwise use the slurmd to wait on the prolog to gate
- the start of the step.
- -- Remove need to check PrologFlags=Alloc in slurmd since we can tell if prolog
- has ran yet or not.
- -- Fix squeue to use a correct macro to check job state.
- -- BGQ - Fix incorrect logic issues if MaxBlockInError=0 in the bluegene.conf.
- -- priority/basic - Insure job priorities continue to decrease when jobs are
- submitted with the --nice option.
- -- Make the PrologFlag=Alloc work on batch scripts
- -- Make PrologFlag=NoHold (automatically sets PrologFlag=Alloc) not hold in
- salloc/srun, instead wait in the slurmd when a step hits a node and the
- prolog is still running.
- -- Added --cpu-freq=highm1 (high minus one) option.
- -- Expand StdIn/Out/Err string length output by "scontrol show job" from 128
- to 1024 bytes.
- -- squeue %F format will now print the job ID for non-array jobs.
- -- Use quicksort for all priority based job sorting, which improves performance
- significantly with large job counts.
- -- If a job has already been released from a held state ignore successive
- release requests.
- -- Fix srun/salloc/sbatch man pages for the --no-kill option.
- -- Add squeue -L/--licenses option to filter jobs by license names.
- -- Handle abort job on node on front end systems without core dumping.
- -- Fix dependency support for job arrays.
- -- When updating jobs verify the update request is not identical to
- the current settings.
- -- When sorting jobs and priorities are equal sort by job_id.
- -- Do not overwrite existing reason for node being down or drained.
- -- Requeue batch job if Munge is down and credential can not be created.
- -- Make _slurm_init_msg_engine() tolerate bug in bind() returning a busy
- ephemeral port.
- -- Don't block scheduling of entire job array if it could run in multiple
- partitions.
- -- Introduce a new debug flag Protocol to print protocol requests received
- together with the remote IP address and port.
- -- CRAY - Set up the network even when only using 1 node.
- -- CRAY - Greatly reduce the number of error messages produced from the task
- plugin and provide more information in the message.
- * Changes in Slurm 14.03.0
- ==========================
- -- job_submit/lua: Fix invalid memory reference if script returns error message
- for user.
- -- Add logic to sleep and retry if slurm.conf can't be read.
- -- Reset a node's CpuLoad value at least once each SlurmdTimeout seconds.
- -- Scheduler enhancements for reservations: When a job needs to run in
- reservation, but can not due to busy resources, then do not block all jobs
- in that partition from being scheduled, but only the jobs in that
- reservation.
- -- Export "SLURM*" environment variables from sbatch even if --export=NONE.
- -- When recovering node state if the Slurm version is 2.6 or 2.5 set the
- protocol version to be SLURM_2_5_PROTOCOL_VERSION which is the minimum
- supported version.
- -- Update the scancel man page documenting the -s option.
- -- Update sacctmgr man page documenting how to modify account's QOS.
- -- Fix for sjstat which currently does not print >1TB memory values correctly.
- -- Change xmalloc()/xfree() to malloc()/free() in hostlist.c for better
- performance.
- -- Update squeue.1 man page describing the SPECIAL_EXIT state.
- -- Added scontrol option of errnumstr to return error message given a slurm
- error number.
- -- If srun invoked with the --multi-prog option, but no task count, then use
- the task count provided in the MPMD configuration file.
- -- Prevent sview abort on some systems when adding or removing columns to the
- display for nodes, jobs, partitions, etc.
- -- Add job array hash table for improved performance.
- -- Make AccountingStorageEnforce=all not include nojobs or nosteps.
- -- Added sacctmgr mod qos set RawUsage=0.
- -- Modify hostlist functions to accept more than two numeric ranges (e.g.
- "row[1-3]rack[0-8]slot[0-63]")
- * Changes in Slurm 14.03.0rc1
- ==============================
- -- Fixed typos in srun_cr man page.
- -- Run job scheduling logic immediately when nodes enter service.
- -- Added sbatch '--parsable' option to output only the job id number and the
- cluster name separated by a semicolon. Errors will still be displayed.
- -- Added failure management "slurmctld/nonstop" plugin.
- -- Prevent jobs being killed when a checkpoint plugin is enabled or disabled.
- -- Update the documentation about SLURM_PMI_KVS_NO_DUP_KEYS environment
- variable.
- -- select/cons_res bug fix for range of node counts with --cpus-per-task
- option (e.g. "srun -N2-3 -c2 hostname" would allocate 2 CPUs on the first
- node and 0 CPUs on the second node).
- -- Change reservation flags field from 16 to 32-bits.
- -- Add reservation flag value of "FIRST_CORES".
- -- Added the idea of Resources to the database. Framework for handling
- license servers outside of Slurm.
- -- When starting the slurmctld only send past job/node state information to
- accounting if running for the first time (should speed up startup
- dramatically on systems with lots of nodes or lots of jobs).
- -- Compile and run on FreeBSD 8.4.
- -- Make job array expressions more flexible to accept multiple step counts in
- the expression (e.g. "--array=1-10:2,50-60:5,123").
- -- switch/cray - add state save/restore logic tracking allocated ports.
- -- SchedulerParameters - Replace max_job_bf with bf_max_job_start (both will
- work for now).
- -- Add SchedulerParameters options of preempt_reorder_count and
- preempt_strict_order.
- -- Make memory types in acct_gather uint64_t to handle systems with more than
- 4TB of memory on them.
- -- BGQ - --export=NONE option for srun to make it so only the SLURM_JOB_ID
- and SLURM_STEP_ID env vars are set.
- -- Munge plugins - Add sleep between retries if can't connect to socket.
- -- Added DebugFlags value of "License".
- -- Added --enable-developer which will give you -Werror when compiling.
- -- Fix for job request with GRES count of zero.
- -- Fix a potential memory leak in hostlist.
- -- Job array dependency logic: Cache results for major performance improvement.
- -- Modify squeue to support filter on job states Special_Exit and Resizing.
- -- Defer purging job record until after EpilogSlurmctld completes.
- -- Add -j option for jobid to sbcast.
- -- Fix handling RPCs from a 14.03 slurmctld to a 2.6 slurmd
- * Changes in Slurm 14.03.0pre6
- ==============================
- -- Modify slurmstepd to log messages according to the LogTimeFormat
- parameter in slurm.conf.
- -- Insure that overlapping reservations do not oversubscribe available
- licenses.
- -- Added core specialization logic to select/cons_res plugin.
- -- Added whole_node field to job_resources structure and enable gang scheduling
- for jobs with core specialization.
- -- When using FastSchedule = 1 the nodes with less than configured resources
- are not longer set DOWN, they are set to DRAIN instead.
- -- Modified 'sacctmgr show associations' command to show GrpCPURunMins
- by default.
- -- Replace the hostlist_push() function with a more efficient
- hostlist_push_host().
- -- Modify the reading of lustre file system statistics to print more
- information when debug and when io error occur.
- -- Add specialized core count field to job credential data.
- NOTE: This changes the communications protocol from other pre-releases of
- version 14.03. All programs must be cancelled and daemons upgraded from
- previous pre-releases of version 14.03. Upgrades from version 2.6 or earlier
- can take place without loss of jobs
- -- Add version number to node and front-end configuration information visible
- using the scontrol tool.
- -- Add idea of a RESERVED flag for node state so idle resources are marked
- not "idle" when in a reservation.
- -- Added core specialization plugin infrastructure.
- -- Added new job_submit/trottle plugin to control the rate at which a user
- can submit jobs.
- -- CRAY - added network performance counters option.
- -- Allow scontrol suspend/resume to accept jobid in the format jobid_taskid
- to suspend/resume array elements.
- -- In the slurmctld job record, split "shared" variable into "share_res" (share
- resource) and "whole_node" fields.
- -- Fix the format of SLURM_STEP_RESV_PORTS. It was generated incorrectly
- when using the hostlist_push_host function and input surrounded by [].
- -- Modify the srun --slurmd-debug option to accept debug string tags
- (quiet, fatal, error, info verbose) beside the numerical values.
- -- Fix the bug where --cpu_bind=map_cpu is interpreted as mask_cpu.
- -- Update the documentation egarding the state of cpu frequencies after
- a step using --cpu-freq completes.
- -- CRAY - Fix issue when a job is requeued and nhc is still running as it is
- being scheduled to run again. This would erase the previous job info
- that was still needed to clean up the nodes from the previous job run.
- (Bug 526).
- -- Set SLURM_JOB_PARTITION environment variable set for all job allocations.
- -- Set SLURM_JOB_PARTITION environment variable for Prolog program.
- -- Added SchedulerParameters option of partition_job_depth to limit scheduling
- logic depth by partition.
- -- Handle the case in which errno is not reset to 0 after calling
- getgrent_r(), which causes the controller to core dump.
- * Changes in Slurm 14.03.0pre5
- ==============================
- -- Added squeue format option of "%X" (core specialization count).
- -- Added core specialization web page (just a start for now).
- -- Added the SLURM_ARRAY_JOB_ID and SLURM_ARRAY_TASK_ID
- in epilog slurmctld environment.
- -- Fix bug in job step allocation failing due to memory limit.
- -- Modify the pbsnodes script to reflect its output on a TORQUE system.
- -- Add ability to clear a node's DRAIN flag using scontrol or sview by setting
- it's state to "UNDRAIN". The node's base state (e.g. "DOWN" or "IDLE") will
- not be changed.
- -- Modify the output of 'scontrol show partition' by displaying
- DefMemPerCPU=UNLIMITED and MaxMemPerCPU=UNLIMITED when these limits are
- configured as 0.
- -- mpirun-mic - Major re-write of the command wrapper for Xeon Phi use.
- -- Add new configuration parameter of AuthInfo to specify port used by
- authentication plugin.
- -- Fixed conditional RPM compiling.
- -- Corrected slurmstepd ident name when logging to syslog.
- -- Fixed sh5util loop when there are no node-step files.
- -- Add SLURM_CLUSTER_NAME to environment variables passed to PrologSlurmctld,
- Prolog, EpilogSlurmctld, and Epilog
- -- Add the idea of running a prolog right when an allocation happens
- instead of when running on the node for the first time.
- -- If user runs 'scontrol reconfig' but hostnames or the host count changes
- the slurmctld throws a fatal error.
- -- gres.conf - Add "NodeName" specification so that a single gres.conf file
- can be used for a heterogeneous cluster.
- -- Add flag to accounting RPC to indicate if job data is packed or not.
- -- After all srun tasks have terminated on a node close the stdout/stderr
- channel with the slurmstepd on that node.
- -- In case of i/o error with slurmstepd log an error message and abort the
- job.
- -- Add --test-only option to sbatch command to validate the script and options.
- The response includes expected start time and resources to be allocated.
- * Changes in Slurm 14.03.0pre4
- ==============================
- -- Remove the ThreadID documentation from slurm.conf. This functionality has
- been obsoleted by the LogTimeFormat.
- -- Sched plugins - rename global and plugin functions names for consistency
- with other plugin types.
- -- BGQ - Added RebootQOSList option to bluegene.conf to allow an implicate
- reboot of a block if only jobs in the list are running on it when cnodes
- go into a failure state.
- -- Correct task count of pending job steps.
- -- Improve limit enforcement for jobs, set RLIMIT_RSS, RLIMIT_AS and/or
- RLIMIT_DATA to enforce memory limit.
- -- Pending job steps will have step_id of INFINITE rather than NO_VAL and
- will be reported as "TBD" by scontrol and squeue commands.
- -- Add logic so PMI_Abort or PMI2_Abort can propagate an exit code.
- -- Added SlurmdPlugstack configuration parameter.
- -- Added PriorityFlag DEPTH_OBLIVIOUS to have the depth of an association
- not effect it's priorty.
- -- Multi-thread the sinfo command (one thread per partition).
- -- Added sgather tool to gather files from a job's compute nodes into a
- central location.
- -- Added configuration parameter FairShareDampeningFactor to offer a greater
- priority range based upon utilization.
- -- Change MaxArraySize and job's array_task_id from 16-bit to 32-bit field.
- Additional Slurm enhancements are be required to support larger job arrays.
- -- Added -S/--core-spec option to salloc, sbatch and srun commands to reserve
- specialized cores for system use. Modify scontrol and sview to get/set
- the new field. No enforcement exists yet for these new options.
- struct job_info / slurm_job_info_t: Added core_spec
- struct job_descriptorjob_desc_msg_t: Added core_spec
- * Changes in Slurm 14.03.0pre3
- ==============================
- -- Do not set SLURM_NODEID environment variable on front-end systems.
- -- Convert bitmap functions to use int32_t instead of int in data structures
- and function arguments. This is to reliably enable use of bitmaps containing
- up to 4 billion elements. Several data structures containing index values
- were also changed from data type int to int32_t:
- - Struct job_info / slurm_job_info_t: Changed exc_node_inx, node_inx, and
- req_node_inx from type int to type int32_t
- - job_step_info_t: Changed node_inx from type int to type int32_t
- - Struct partition_info / partition_info_t: Changed node_inx from type int
- to type int32_t
- - block_job_info_t: Changed cnode_inx from type int to type int32_t
- - block_info_t: Changed ionode_inx and mp_inx from type int to type int32_t
- - Struct reserve_info / reserve_info_t: Changed node_inx from type int to
- type int32_t
- -- Modify qsub wrapper output to match torque command output, just print the
- job ID rather than "Submitted batch job #"
- -- Change Slurm error string for ESLURM_MISSING_TIME_LIMIT from
- "Missing time limit" to
- "Time limit specification required, but not provided"
- -- Change salloc job_allocate error message header from
- "Failed to allocate resources" to
- "Job submit/allocate failed"
- -- Modify slurmctld message retry logic to support Cray cold-standby SDB.
- * Changes in Slurm 14.03.0pre2
- ==============================
- -- Added "JobAcctGatherParams" configuration parameter. Value of "NoShare"
- disables accounting for shared memory.
- -- Added fields to "scontrol show job" output: boards_per_node,
- sockets_per_board, ntasks_per_node, ntasks_per_board, ntasks_per_socket,
- ntasks_per_core, and nice.
- -- Add squeue output format options for job command and working directory
- (%o and %Z respectively).
- -- Add stdin/out/err to sview job output.
- -- Add new job_state of JOB_BOOT_FAIL for job terminations due to failure to
- boot it's allocated nodes or BlueGene block.
- -- CRAY - Add SelectTypeParameters NHC_NO_STEPS and NHC_NO which will disable
- the node health check script for steps and allocations respectfully.
- -- Reservation with CoreCnt: Avoid possible invalid memory reference.
- -- Add new error code for attempt to create a reservation with duplicate name.
- -- Validate that a hostlist file contains text (i.e. not a binary).
- -- switch/generic - propagate switch information from srun down to slurmd and
- slurmstepd.
- -- CRAY - Do not package Slurm's libpmi or libpmi2 libraries. The Cray version
- of those libraries must be used.
- -- Added a new option to the scontrol command to view licenses that are
- configured in use and avalable. 'scontrol show licenses'.
- -- MySQL - Made Slurm compatible with 5.6
- * Changes in Slurm 14.03.0pre1
- ==============================
- -- sview - improve scalability
- -- Add task pointer to the task_post_term() function in task plugins. The
- terminating task's PID is available in task->pid.
- -- Move select/cray to select/alps
- -- Defer sending SIGKILL signal to processes while core dump in progress.
- -- Added JobContainerPlugin configuration parameter and plugin infrastructure.
- -- Added partition configuration parameters AllowAccounts, AllowQOS,
- DenyAccounts and DenyQOS.
- -- The rpmbuild option for a cray system with ALPS has changed from
- %_with_cray to %_with_cray_alps.
- -- The log file timestamp format can now be selected at runtime via the
- LogTimeFormat configuration option. See the slurm.conf and slurmdbd.conf
- man pages for details.
- -- Added switch/generic plugin to a job's convey network topology.
- -- BLUEGENE - If block is in 'D' state or has more cnodes in error than
- MaxBlockInError set the job wait reason appropriately.
- -- API use: Generate an error return rather than fatal error and exit if the
- configuraiton file is absent or invalid. This will permit Slurm APIs to be
- more reliably used by other programs.
- -- Add support for load-based scheduling, allocate jobs to nodes with the
- largest number of available CPUs. Added SchedulingParameters paramter of
- "CR_LLN" and partition parameter of "LLN=yes|no".
- -- Added job_info() and step_info() functions to the gres plugins to extract
- plugin specific fields from the job's or step's GRES data structure.
- -- Added sbatch --signal option of "B:" to signal the batch shell rather than
- only the spawned job steps.
- -- Added sinfo and squeue format option of "%all" to print all fields available
- for the data type with a vertical bar separating each field.
- -- Add mechanism for job_submit plugin to generate error message for srun,
- salloc or sbatch to stderr. New argument added to job_submit function in
- the plugin.
- -- Add StdIn, StdOut, and StdErr paths to job information dumped with
- "scontrol show job".
- -- Permit Slurm administrator to submit a batch job as any user.
- -- Set a job's RLIMIT_AS limit based upon it's memory limit and VsizeFactor
- configuration value.
- -- Remove Postgres plugins
- -- Make jobacct_gather/cgroup work correctly and also make all jobacct_gather
- plugins more maintainable.
- -- Proctrack/pgid - Add support for proctrack_p_plugin_get_pids() function.
- -- Sched/backfill - Change default max_job_bf parameter from 50 to 100.
- -- Added -I|--item-extract option to sh5util to extract data item from series.
- * Changes in Slurm 2.6.10
- =========================
- -- Switch/nrt - On switch resource allocation failure, free partial allocation.
- -- Switch/nrt - Properly track usage of CAU and RDMA resources with multiple
- tasks per compute node.
- -- Fix issue where user is requesting --acctg-freq=0 and no memory limits.
- -- BGQ - Temp fix issue where job could be left on job_list after it finished.
- -- BGQ - Fix issue where limits were checked on midplane counts instead of
- cnode counts.
- -- BGQ - Move code to only start job on a block after limits are checked.
- -- Handle node ranges better when dealing with accounting max node limits.
- -- Fix perlapi to compile correctly with perl 5.18
- -- BGQ - Fix issue with uninitialized variable.
- -- Correct sinfo --sort fields to match documentation: E => Reason,
- H -> Reason Time (new), R -> Partition Name, u/U -> Reason user (new)
- -- If an invalid assoc_ptr comes in don't use the id to verify it.
- -- Sched/backfill modified to avoid using nodes in completing state.
- -- Correct support for job --profile=none option and related documentation.
- -- Properly enforce job --requeue and --norequeue options.
- -- If a job --mem-per-cpu limit exceeds the partition or system limit, then
- scale the job's memory limit and CPUs per task to satisfy the limit.
- -- Correct logic to support Power7 processor with 1 or 2 threads per core
- (CPU IDs are not consecutive).
- * Changes in Slurm 2.6.9
- ========================
- -- Fix sinfo to work correctly with draining/mixed nodes as well as filtering
- on Mixed state.
- -- Fix sacctmgr update user with no "where" condition.
- -- Fix logic bugs for SchedulerParameters option of max_rpc_cnt.
- * Changes in Slurm 2.6.8
- ========================
- -- Add support for Torque/PBS job array options and environment variables.
- -- CRAY/ALPS - Add support for CLE52
- -- Fix issue where jobs still pending after a reservation would remain
- in waiting reason ReqNodeNotAvail.
- -- Update last_job_update when a job's state_reason was modified.
- -- Free job_ptr->state_desc where ever state_reason is set.
- -- Fixed sacct.1 and srun.1 manual pages which contains a hyphen where
- a minus sign for options was intended.
- -- sinfo - Make sure if partition name is long and the default the last char
- doesn't get chopped off.
- -- task/affinity - Protect against zero divide when simulating more hardware
- than you really have.
- -- NRT - Fix issue with 1 node jobs. It turns out the network does need to
- be setup for 1 node jobs.
- -- Fix recovery of job dependency on task of job array when slurmctld restarts.
- -- mysql - Fix invalid memory reference.
- -- Lock the /cgroup/freezer subsystem when creating files for tracking processes.
- -- Fix preempt/partition_prio to avoid preempting jobs in partitions with
- PreemptMode=OFF
- -- launch/poe - Implicitly set --network in job step create request as needed.
- -- Permit multiple batch job submissions to be made for each run of the
- scheduler logic if the job submissions occur at the nearly same time.
- -- Fix issue where associations weren't correct if backup takes control and
- new associations were added since it was started.
- -- Fix race condition is corner case with backup slurmctld.
- -- With the backup slurmctld make sure we reinit beginning values in the
- slurmdbd plugin.
- -- Fix sinfo to work correctly with draining/mixed nodes.
- -- MySQL - Fix it so a lock isn't held unnecessarily.
- -- Added new SchedulerParameters option of max_rpc_cnt when too many RPCs
- are active.
- -- BGQ - Fix deny_pass to work correctly.
- -- BGQ - Fix sub block steps using a block when the block has passthrough's
- in it.
- * Changes in Slurm 2.6.7
- ========================
- -- Properly enforce a job's cpus-per-task option when a job's allocation is
- constrained on some nodes by the mem-per-cpu option.
- -- Correct the slurm.conf man pages and checkpoint_blcr.html page
- describing that jobs must be drained from cluster before deploying
- any checkpoint plugin. Corrected in version 14.03.
- -- Fix issue where if using munge and munge wasn't running and a slurmd
- needed to forward a message, the slurmd would core dump.
- -- Update srun.1 man page documenting the PMI2 support.
- -- Fix slurmctld core dump when a jobs gets its QOS updated but there
- is not a corresponding association.
- -- If a job requires specific nodes and can not run due to those nodes being
- busy, the main scheduling loop will block those specific nodes rather than
- the entire queue/partition.
- -- Fix minor memory leak when updating a job's name.
- -- Fix minor memory leak when updating a reservation on a partition using "ALL"
- nodes.
- -- Fix minor memory leak when adding a reservation with a nodelist and core
- count.
- -- Update sacct man page description of job states.
- -- BGQ - Fix minor memory leak when selecting blocks that can't immediately be
- placed.
- -- Fixed minor memory leak in backfill scheduler.
- -- MYSQL - Fixed memory leak when querying clusters.
- -- MYSQL - Fix when updating QOS on an association.
- -- NRT - Fix to supply correct error messages to poe/pmd when a launch fails.
- -- Add SLURM_STEP_ID to Prolog environment.
- -- Add support for SchedulerParameters value of bf_max_job_start that limits
- the total number of jobs that can be started in a single iteration of the
- backfill scheduler.
- -- Don't print negative number when dealing with large memory sizes with
- sacct.
- -- Fix sinfo output so that host in state allocated and mixed will not be
- merged together.
- -- GRES: Avoid crash if GRES configurations is inconstent.
- -- Make S_SLURM_RESTART_COUNT item available to SPANK.
- -- Munge plugins - Add sleep between retries if can't connect to socket.
- -- Fix the database query to return all pending jobs in a given time interval.
- -- switch/nrt - Correct logic to get dynamic window count.
- -- Remove need to use job->ctx_params in the launch plugin, just to simplify
- code.
- -- NRT - Fix possible memory leak if using multiple adapters.
- -- NRT - Fix issue where there are more than NRT_MAXADAPTERS on a system.
- -- NRT - Increase Max number of adapters from 8 -> 9
- -- NRT - Initialize missing variables when the PMD is starting a job.
- -- NRT - Fix issue where we are launching hosts out of numerical order,
- this would cause pmd's to hang.
- -- NRT - Change xmalloc's to malloc just to be safe.
- -- NRT - Sanity check to make sure a jobinfo is there before packing.
- -- Add missing options to the print of TaskPluginParam.
- -- Fix a couple of issues with scontrol reconfig and adding nodes to
- slurm.conf. Rebooting daemons after adding nodes to the slurm.conf
- is highly recommended.
- * Changes in Slurm 2.6.6
- ========================
- -- sched/backfill - Fix bug that could result in failing to reserve resources
- for high priority jobs.
- -- Correct job RunTime if requeued from suspended state.
- -- Reset job priority from zero (held) on manual resume from suspend state.
- -- If FastSchedule=0 then do not DOWN a node with low memory or disk size.
- -- Remove vestigial note.
- -- Update sshare.1 man page making it consistent with sacctmgr.1.
- -- Do not reset a job's priority when the slurmctld restarts if previously
- set to some specific value.
- -- sview - Fix regression where the Node tab wasn't able to add/remove columns.
- -- Fix slurmstepd lock when job terminates inside the infiniband
- network traffic accounting plugin.
- -- Correct the documentation to read filesystem instead of Lustre. Update
- the srun help.
- -- Fix the acct_gather_filesystem_lustre.c to compute the Lustre accounting
- data correctly accumulating differences between sampling intervals.
- Fix the data structure mismatch between acct_gather_filesystem_lustre.c
- and slurm_jobacct_gather.h which caused the hdf5 plugin to log incorrect
- data.
- -- Don't allow PMI_TIME to be zero which will cause floating exception.
- -- Fix purging of old reservation errors in database.
- -- MYSQL - If starting the plugin and the database isn't up attempt to
- connect in a loop instead of producing a fatal.
- -- BLUEGENE - If IONodesPerMP changes in bluegene.conf recalculate bitmaps
- based on ionode count correctly on slurmctld restart.
- -- Fix step allocation when some CPUs are not available due to memory limits.
- This happens when one step is active and using memory that blocks the
- scheduling of another step on a portion of the CPUs needed. The new step
- is now delayed rather than aborting with "Requested node configuration is
- not available".
- -- Make sure node limits get assessed if no node count was given in request.
- -- Removed obsolete slurm_terminate_job() API.
- -- Update documentation about QOS limits
- -- Retry task exit message from slurmstepd to srun on message timeout.
- -- Correction to logic reserving all nodes in a specified partition.
- -- Added support for selecting AMD GPU by setting GPU_DEVICE_ORDINAL env var.
- -- Properly enforce GrpSubmit limit for job arrays.
- -- CRAY - fix issue with using CR_ONE_TASK_PER_CORE
- -- CRAY - fix memory leak when using accelerators
- * Changes in Slurm 2.6.5
- ========================
- -- Correction to hostlist parsing bug introduced in v2.6.4 for hostlists with
- more than one numeric range in brackets (e.g. rack[0-3]_blade[0-63]").
- -- Add notification if using proctrack/cgroup and task/cgroup when oom hits.
- -- Corrections to advanced reservation logic with overlapping jobs.
- -- job_submit/lua - add cpus_per_task field to those available.
- -- Add cpu_load to the node information available using the Perl API.
- -- Correct a job's GRES allocation data in accounting records for non-Cray
- systems.
- -- Substantial performance improvement for systems with Shared=YES or FORCE
- and large numbers of running jobs (replace bubble sort with quick sort).
- -- proctrack/cgroup - Add locking to prevent race condition where one job step
- is ending for a user or job at the same time another job stepsis starting
- and the user or job container is deleted from under the starting job step.
- -- Fixed sh5util loop when there are no node-step files.
- -- Fix race condition on batch job termination that could result in a job exit
- code of 0xfffffffe if the slurmd on node zero registers its active jobs at
- the same time that slurmstepd is recording the job's exit code.
- -- Correct logic returning remaining job dependencies in job information
- reported by scontrol and squeue. Eliminates vestigial descriptors with
- no job ID values (e.g. "afterany").
- -- Improve performance of REQUEST_JOB_INFO_SINGLE RPC by removing unnecessary
- locks and use hash function to find the desired job.
- -- jobcomp/filetxt - Reopen the file when slurmctld daemon is reconfigured
- or gets SIGHUP.
- -- Remove notice of CVE with very old/deprecated versions of Slurm in
- news.html.
- -- Fix if hwloc_get_nbobjs_by_type() returns zero core count (set to 1).
- -- Added ApbasilTimeout parameter to the cray.conf configuration file.
- -- Handle in the API if parts of the node structure are NULL.
- -- Fix srun hang when IO fails to start at launch.
- -- Fix for GRES bitmap not matching the GRES count resulting in abort
- (requires manual resetting of GRES count, changes to gres.conf file,
- and slurmd restarts).
- -- Modify sview to better support job arrays.
- -- Modify squeue to support longer job ID values (for many job array tasks).
- -- Fix race condition in authentication credential creation that could corrupt
- memory. (NOTE: This race condition has existed since 2003 and would be
- exceedingly rare.)
- -- HDF5 - Fix minor memory leak.
- -- Slurmstepd variable initialization - Without this patch, free() is called
- on a random memory location (i.e. whatever is on the stack), which can
- result in slurmstepd dying and a completed job not being purged in a
- timely fashion.
- -- Fix slurmstepd race condition when separate threads are reading and
- modifying the job's environment, which can result in the slurmstepd failing
- with an invalid memory reference.
- -- Fix erroneous error messages when running gang scheduling.
- -- Fix minor memory leak.
- -- scontrol modified to suspend, resume, hold, uhold, or release multiple
- jobs in a space separated list.
- -- Minor debug error when a connection goes away at the end of a job.
- -- Validate return code from calls to slurm_get_peer_addr
- -- BGQ - Fix issues with making sure all cnodes are accounted for when mulitple
- steps cause multiple cnodes in one allocation to go into error at the
- same time.
- -- scontrol show job - Correct NumNodes value calculated based upon job
- specifications.
- -- BGQ - Fix issue if user runs multiple sub-block jobs inside a multiple
- midplane block that starts on a higher coordinate than it ends (i.e if a
- block has midplanes [0010,0013] 0013 is the start even though it is
- listed second in the hostlist).
- -- BGQ - Add midplane to the total_cnodes used in the runjob_mux plugin
- for better debug.
- -- Update AllocNodes paragraph in slurm.conf.5.
- * Changes in Slurm 2.6.4
- ========================
- -- Fixed sh5util to print its usage.
- -- Corrected commit f9a3c7e4e8ec.
- -- Honor ntasks-per-node option with exclusive node allocations.
- -- sched/backfill - Prevent invalid memory reference if bf_continue option is
- configured and slurm is reconfigured during one of the sleep cycles or if
- there are any changes to the partition configuration or if the normal
- scheduler runs and starts a job that the backfill scheduler is actively
- working on.
- -- Update man pages information about acct-freq and JobAcctGatherFrequency
- to reflect only the latest supported format.
- -- Minor document update to include note about PrivateData=Usage for the
- slurm.conf when using the DBD.
- -- Expand information reported with DebugFlags=backfill.
- -- Initiate jobs pending to run in a reservation as soon as the reservation
- becomes active.
- -- Purged expired reservation even if it has pending jobs.
- -- Corrections to calculation of a pending job's expected start time.
- -- Remove some vestigial logic treating job priority of 1 as a special case.
- -- Memory freeing up to avoid minor memory leaks at close of daemons
- -- Updated documentation to give correct units being displayed.
- -- Report AccountingStorageBackupHost with "scontrol show config".
- -- init scripts ignore quotes around Pid file name specifications.
- -- Fixed typo about command case in quickstart.html.
- -- task/cgroup - handle new cpuset files, similar to commit c4223940.
- -- Replace the tempname() function call with mkstemp().
- -- Fix for --cpu_bind=map_cpu/mask_cpu/map_ldom/mask_ldom plus
- --mem_bind=map_mem/mask_mem options, broken in 2.6.2.
- -- Restore default behavior of allocating cores to jobs on a cyclic basis
- across the sockets unless SelectTypeParameters=CR_CORE_DEFAULT_DIST_BLOCK
- or user specifies other distribution options.
- -- Enforce JobRequeue configuration parameter on node failure. Previously
- always requeued the job.
- -- acct_gather_energy/ipmi - Add delay before retry on read error.
- -- select/cons_res with GRES and multiple threads per core, fix possible
- infinite loop.
- -- proctrack/cgroup - Add cgroup create retry logic in case one step is
- starting at the same time as another step is ending and the logic to create
- and delete cgroups overlaps.
- -- Improve setting of job wait "Reason" field.
- -- Correct sbatch documentation and job_submit/pbs plugin "%j" is job ID,
- not "%J" (which is job_id.step_id).
- -- Improvements to sinfo performance, especially for large numbers of
- partitions.
- -- SlurmdDebug - Permit changes to slurmd debug level with "scontrol reconfig"
- -- smap - Avoid invalid memory reference with hidden nodes.
- -- Fix sacctmgr modify qos set preempt+/-=.
- -- BLUEGENE - fix issue where node count wasn't set up correctly when srun
- preforms the allocation, regression in 2.6.3.
- -- Add support for dependencies of job array elements (e.g.
- "sbatch --depend=afterok:123_4 ...") or all elements of a job array (e.g.
- "sbatch --depend=afterok:123 ...").
- -- Add support for new options in sbatch qsub wrapper:
- -W block=true (wait for job completion)
- Clear PBS_NODEFILE environment variable
- -- Fixed the MaxSubmitJobsPerUser limit in QOS which limited submissions
- a job too early.
- -- sched/wiki, sched/wiki2 - Fix to work with change logic introduced in
- version 2.6.3 preventing Maui/Moab from starting jobs.
- -- Updated the QOS limits documentation and man page.
- * Changes in Slurm 2.6.3
- ========================
- -- Add support for some new #PBS options in sbatch scripts and qsub wrapper:
- -l accelerator=true|false (GPU use)
- -l mpiprocs=# (processors per node)
- -l naccelerators=# (GPU count)
- -l select=# (node count)
- -l ncpus=# (task count)
- -v key=value (environment variable)
- -W depend=opts (job dependencies, including "on" and "before" options)
- -W umask=# (set job's umask)
- -- Added qalter and qrerun commands to torque package.
- -- Corrections to qstat logic: job CPU count and partition time format.
- -- Add job_submit/pbs plugin to translate PBS job dependency options to the
- extend possible (no support for PBS "before" options) and set some PBS
- environment variables.
- -- Add spank/pbs plugin to set a bunch of PBS environment variables.
- -- Backported sh5util from master to 2.6 as there are some important
- bugfixes and the new item extraction feature.
- -- select/cons_res - Correct MacCPUsPerNode partition constraint for CR_Socket.
- -- scontrol - for setdebugflags command, avoid parsing "-flagname" as an
- scontrol command line option.
- -- Fix issue with step accounting if a job is requeued.
- -- Close file descriptors on exec of prolog, epilog, etc.
- -- Fix issue when a user has held a job and then sets the begin time
- into the future.
- -- Scontrol - Enable changing a job's stdout file.
- -- Fix issues where memory or node count of a srun job is altered while the
- srun is pending. The step creation would use the old values and possibly
- hang srun since the step wouldn't be able to be created in the modified
- allocation.
- -- Add support for new SchedulerParameters value of "bf_max_job_part", the
- maximum depth the backfill scheduler should go in any single partition.
- -- acct_gather/infiniband plugin - Correct packets_in/out values.
- -- BLUEGENE - Don't ignore a conn-type request from the user.
- -- BGQ - Force a request on a Q for a MESH to be a TORUS in a dimension that
- can only be a TORUS (1).
- -- Change max message length from 100MB to 1GB before generating "Insane
- message length" error.
- -- sched/backfill - Prevent possible memory corruption due to use of
- bf_continue option and long running scheduling cycle (pending jobs could
- have been cancelled and purged).
- -- CRAY - fix AcceleratorAllocation depth correctly for basil 1.3
- -- Created the environment variable SLURM_JOB_NUM_NODES for srun jobs and
- updated the srun man page.
- -- BLUEGENE/CRAY - Don't set env variables that pertain to a node when Slurm
- isn't doing the launching.
- -- gres/gpu and gres/mic - Do not treat the existence of an empty gres.conf
- file as a fatal error.
- -- Fixed for if hours are specified as 0 the time days-0:min specification
- is not parsed correctly.
- -- switch/nrt - Fix for memory leak.
- -- Subtract the PMII_COMMANDLEN_SIZE in contribs/pmi2/pmi2_api.c to prevent
- certain implementation of snprintf() to segfault.
- * Changes in Slurm 2.6.2
- ========================
- -- Fix issue with reconfig and GrpCPURunMins
- -- Fix of wrong node/job state problem after reconfig
- -- Allow users who are coordinators update their own limits in the accounts
- they are coordinators over.
- -- BackupController - Make sure we have a connection to the DBD first thing
- to avoid it thinking we don't have a cluster name.
- -- Correct value of min_nodes returned by loading job information to consider
- the job's task count and maximum CPUs per node.
- -- If running jobacct_gather/none fix issue on unpacking step completion.
- -- Reservation with CoreCnt: Avoid possible invalid memory reference.
- -- sjstat - Add man page when generating rpms.
- -- Make sure GrpCPURunMins is added when creating a user, account or QOS with
- sacctmgr.
- -- Fix for invalid memory reference due to multiple free calls caused by
- job arrays submitted to multiple partitions.
- -- Enforce --ntasks-per-socket=1 job option when allocating by socket.
- -- Validate permissions of key directories at slurmctld startup. Report
- anything that is world writable.
- -- Improve GRES support for CPU topology. Previous logic would pick CPUs then
- reject jobs that can not match GRES to the allocated CPUs. New logic first
- filters out CPUs that can not use the GRES, next picks CPUs for the job,
- and finally picks the GRES that best match those CPUs.
- -- Switch/nrt - Prevent invalid memory reference when allocating single adapter
- per node of specific adapter type
- -- CRAY - Make Slurm work with CLE 5.1.1
- -- Fix segfault if submitting to multiple partitions and holding the job.
- -- Use MAXPATHLEN instead of the hardcoded value 1024 for maximum file path
- lengths.
- -- If OverTimeLimit is defined do not declare failed those jobs that ended
- in the OverTimeLimit interval.
- * Changes in Slurm 2.6.1
- ========================
- -- slurmdbd - Allow job derived ec and comments to be modified by non-root
- users.
- -- Fix issue with job name being truncated to 24 chars when sending a mail
- message.
- -- Fix minor issues with spec file, missing files and including files
- erroneously on a bluegene system.
- -- sacct - fix --name and --partition options when using
- accounting_storage/filetxt.
- -- squeue - Remove extra whitespace of default printout.
- -- BGQ - added head ppcfloor as an include dir when building.
- -- BGQ - Better debug messages in runjob_mux plugin.
- -- PMI2 Updated the Makefile.am to build a versioned library.
- -- CRAY - Fix srun --mem_bind=local option with launch/aprun.
- -- PMI2 Corrected buffer size computation in the pmi2_api.c module.
- -- GRES accounting data wrong in database: gres_alloc, gres_req, and gres_used
- fields were empty if the job was not started immediately.
- -- Fix sbatch and srun task count logic when --ntasks-per-node specified,
- but no explicit task count.
- -- Corrected the hdf5 profile user guide and the acct_gather.conf
- documentation.
- -- IPMI - Fix Math bug getting new wattage.
- -- Corrected the AcctGatherProfileType documentation in slurm.conf
- -- Corrected the sh5util program to print the header in the csv file
- only once, set the debug messages at debug() level, make the argument
- check case insensitive and avoid printing duplicate \n.
- -- If cannot collect energy values send message to the controller
- to drain the node and log error slurmd log file.
- -- Handle complete removal of CPURunMins time at the end of the job instead
- of at multifactor poll.
- -- sview - Add missing debug_flag options.
- -- PGSQL - Notes about Postgres functionality being removed in the next
- version of Slurm.
- -- MYSQL - fix issue when rolling up usage and events happened when a cluster
- was down (slurmctld not running) during that time period.
- -- sched/wiki2 - Insure that Moab gets current CPU load information.
- -- Prevent infinite loop in parsing configuration if including file containing
- one blank line.
- -- Fix pack and unpack between 2.6 and 2.5.
- -- Fix job state recovery logic in which a job's accounting frequency was
- not set. This would result in a value of 65534 seconds being used (the
- equivalent of NO_VAL in uint16_t), which could result in the job being
- requeued or aborted.
- -- Validate a job's accounting frequency at submission time rather than
- waiting for it's initiation to possibly fail.
- -- Fix CPURunMins if a job is requeued from a failed launch.
- -- Fix in accounting_storage/filetxt to correct start times which sometimes
- could end up before the job started.
- -- Fix issue with potentially referencing past an array in parse_time()
- -- CRAY - fix issue with accelerators on a cray when parsing BASIL 1.3 XML.
- -- Fix issue with a 2.5 slurmstepd locking up when talking to a 2.6 slurmd.
- -- Add argument to priority plugin's priority_p_reconfig function to note
- when the association and QOS used_cpu_run_secs field has been reset.
- * Changes in Slurm 2.6.0
- ========================
- -- Fix it so bluegene and serial systems don't get warnings over new NODEDATA
- enum.
- -- When a job is aborted send a message for any tasks that have completed.
- -- Correction to memory per CPU calculation on system with threads and
- allocating cores or sockets.
- -- Requeue batch job if it's node reboots (used to abort the job).
- -- Enlarge maximum size of srun's hostlist file.
- -- IPMI - Fix first poll to get correct consumed_energy for a step.
- -- Correction to job state recovery logic that could result in assert failure.
- -- Record partial step accounting record if allocated nodes fail abnormally.
- -- Accounting - fix issue where PrivateData=jobs or users could potentially
- show information to users that had no associations on the system.
- -- Make PrivateData in slurmdbd.conf case insensitive.
- -- sacct/sstat - Add format option ConsumedEnergyRaw to print full energy
- values.
- * Changes in Slurm 2.6.0rc2
- ===========================
- -- HDF5 - Fix issue with Ubuntu where HDF5 development headers are
- overwritten by the parallel versions thus making it so we need handle
- both cases.
- -- ACCT_GATHER - handle suspending correctly for polling threads.
- -- Make SLURM_DISTRIBUTION env var hold both types of distribution if
- specified.
- -- Remove hardcoded /usr/local from slurm.spec.
- -- Modify slurmctld locking to improve performance under heavy load with
- very large numbers of batch job submissions or job cancellations.
- -- sstat - Fix issue where if -j wasn't given allow last argument to be checked
- for as the job/step id.
- -- IPMI - fix adjustment on poll when using EnergyIPMICalcAdjustment.
- * Changes in Slurm 2.6.0rc1
- ===========================
- -- Added helper script for launching symmetric and MIC-only MPI tasks within
- SLURM (in contribs/mic/mpirun-mic).
- -- Change maximum delay for state save from 2 secs to 5 secs. Make timeout
- configurable at build time by defining SAVE_MAX_WAIT.
- -- Modify slurmctld data structure locking to interleave read and write
- locks rather than always favor write locks over read locks.
- -- Added sacct format option of "ALL" to print all fields.
- -- Deprecate the SchedulerParameters value of "interval" use "bf_interval"
- instead as documented.
- -- Add acct_gather_profile/hdf5 to profile jobs with hdf5
- -- Added MaxCPUsPerNode partition configuration parameter. This can be
- especially useful to schedule systems with GPUs.
- -- Permit "scontrol reboot_node" for nodes in MAINT reservation.
- -- Added "PriorityFlags" value of "SMALL_RELATIVE_TO_TIME". If set, the job's
- size component will be based upon not the job size alone, but the job's
- size divided by it's time limit.
- -- Added sbatch option "--ignore-pbs" to ignore "#PBS" options in the batch
- script.
- -- Rename slurm_step_ctx_params_t field from "mem_per_cpu" to "pn_min_memory".
- Job step now accepts memory specification in either per-cpu or per-node
- basis.
- -- Add ability to specify host repitition count in the srun hostfile (e.g.
- "host1*2" is equivalent to "host1,host1").
- * Changes in Slurm 2.6.0pre3
- ============================
- -- Add milliseconds to default log message header (both RFC 5424 and ISO 8601
- time formats). Disable milliseconds logging using the configure
- parameter "--disable-log-time-msec". Default time format changes to
- ISO 8601 (without time zone information). Specify "--enable-rfc5424time"
- to restore the time zone information.
- -- Add username (%u) to the filename pattern in the batch script.
- -- Added options for front end nodes of AllowGroups, AllowUsers, DenyGroups,
- and DenyUsers.
- -- Fix sched/backfill logic to initiate jobs with maximum time limit over the
- partition limit, but the minimum time limit permits it to start.
- -- gres/gpu - Fix for gres.conf file with multiple files on a single line
- using a slurm expression (e.g. "File=/dev/nvidia[0-1]").
- -- Replaced ipmi.conf with generic acct_gather.conf file for all acct_gather
- plugins. For those doing development to use this follow the model set
- forth in the acct_gather_energy_ipmi plugin.
- -- Added more options to update a step's information
- -- Add DebugFlags=ThreadID which will print the thread id of the calling
- thread.
- -- CRAY - Allocate whole node (CPUs) in reservation despite what the
- user requests. We have found any srun/aprun afterwards will work on a
- subset of resources.
- * Changes in Slurm 2.6.0pre2
- ============================
- -- Do not purge inactive interactive jobs that lack a port to ping (added
- for MR+ operation).
- -- Advanced reservations with hostname and core counts now supports asymetric
- reservations (e.g. specific different core count for each node).
- -- Added slurmctld/dynalloc plugin for MapReduce+ support.
- -- Added "DynAllocPort" configuration parameter.
- -- Added partition paramter of SelectTypeParameters to override system-wide
- value.
- -- Added cr_type to partition_info data structure.
- -- Added allocated memory to node information available (within the existing
- select_nodeinfo field of the node_info_t data structure). Added Allocated
- Memory to node information displayed by sview and scontrol commands.
- -- Make sched/backfill the default scheduling plugin rather than sched/builtin
- (FIFO).
- -- Added support for a job having different priorities in different partitions.
- -- Added new SchedulerParameters configuration parameter of "bf_continue"
- which permits the backfill scheduler to continue considering jobs for
- backfill scheduling after yielding locks even if new jobs have been
- submitted. This can result in lower priority jobs from being backfill
- scheduled instead of newly arrived higher priority jobs, but will permit
- more queued jobs to be considered for backfill scheduling.
- -- Added support to purge reservation records from accounting.
- -- Cray - Add support for Basil 1.3
- * Changes in SLURM 2.6.0pre1
- ============================
- -- Add "state" field to job step information reported by scontrol.
- -- Notify srun to retry step creation upon completion of other job steps
- rather than polling. This results in much faster throughput for job step
- execution with --exclusive option.
- -- Added "ResvEpilog" and "ResvProlog" configuration parameters to execute a
- program at the beginning and end of each reservation.
- -- Added "slurm_load_job_user" function. This is a variation of
- "slurm_load_jobs", but accepts a user ID argument, potentially resulting
- in substantial performance improvement for "squeue --user=ID"
- -- Added "slurm_load_node_single" function. This is a variation of
- "slurm_load_nodes", but accepts a node name argument, potentially resulting
- in substantial performance improvement for "sinfo --nodes=NAME".
- -- Added "HealthCheckNodeState" configuration parameter identify node states
- on which HealthCheckProgram should be executed.
- -- Remove sacct --dump --formatted-dump options which were deprecated in
- 2.5.
- -- Added support for job arrays (phase 1 of effort). See "man sbatch" option
- -a/--array for details.
- -- Add new AccountStorageEnforce options of 'nojobs' and 'nosteps' which will
- allow the use of accounting features like associations, qos and limits but
- not keep track of jobs or steps in accounting.
- -- Cray - Add new cray.conf parameter of "AlpsEngine" to specify the
- communication protocol to be used for ALPS/BASIL.
- -- select/cons_res plugin: Correction to CPU allocation count logic in for
- cores without hyperthreading.
- -- Added new SelectTypeParameter value of "CR_ALLOCATE_FULL_SOCKET".
- -- Added PriorityFlags value of "TICKET_BASED" and merged priority/multifactor2
- plugin into priority/multifactor plugin.
- -- Add "KeepAliveTime" configuration parameter controlling how long sockets
- used for srun/slurmstepd communications are kept alive after disconnect.
- -- Added SLURM_SUBMIT_HOST to salloc, sbatch and srun job environment.
- -- Added SLURM_ARRAY_TASK_ID to environment of job array.
- -- Added squeue --array/-r option to optimize output for job arrays.
- -- Added "SlurmctldPlugstack" configuration parameter for generic stack of
- slurmctld daemon plugins.
- -- Removed contribs/arrayrun tool. Use native support for job arrays.
- -- Modify default installation locations for RPMs to match "make install":
- _prefix /usr/local
- _slurm_sysconfdir %{_prefix}/etc/slurm
- _mandir %{_prefix}/share/man
- _infodir %{_prefix}/share/info
- -- Add acct_gather_energy/ipmi which works off freeipmi for energy gathering
- * Changes in Slurm 2.5.8
- ========================
- -- Fix for slurmctld segfault on NULL front-end reason field.
- -- Avoid gres step allocation errors when a job shrinks in size due to either
- down nodes or explicit resizing. Generated slurmctld errors of this type:
- "step_test ... gres_bit_alloc is NULL"
- -- Fix bug that would leak memory and over-write the AllowGroups field if on
- "scontrol reconfig" when AllowNodes is manually changed using scontrol.
- -- Get html/man files to install in correct places with rpms.
- -- Remove --program-prefix from spec file since it appears to be added by
- default and appeared to break other things.
- -- Updated the automake min version in autogen.sh to be correct.
- -- Select/cons_res - Correct total CPU count allocated to a job with
- --exclusive and --cpus-per-task options
- -- switch/nrt - Don't allocate network resources unless job step has 2+ nodes.
- -- select/cons_res - Avoid extraneous "oversubscribe" error messages.
- -- Reorder get config logic to avoid deadlock.
- -- Enforce QOS MaxCPUsMin limit when job submission contains no user-specified
- time limit.
- -- EpilogSlurmctld pthread is passed required arguments rather than a pointer
- to the job record, which under some conditions could be purged and result
- in an invalid memory reference.
- * Changes in Slurm 2.5.7
- ========================
- -- Fix for linking to the select/cray plugin to not give warning about
- undefined variable.
- -- Add missing symbols to the xlator.h
- -- Avoid placing pending jobs in AdminHold state due to backfill scheduler
- interactions with advanced reservation.
- -- Accounting - make average by task not cpu.
- -- CRAY - Change logging of transient ALPS errors from error() to debug().
- -- POE - Correct logic to support poe option "-euidevice sn_all" and
- "-euidevice sn_single".
- -- Accounting - Fix minor initialization error.
- -- POE - Correct logic to support srun network instances count with POE.
- -- POE - With the srun --launch-cmd option, report proper task count when
- the --cpus-per-task option is used without the --ntasks option.
- -- POE - Fix logic binding tasks to CPUs.
- -- sview - Fix race condition where new information could of slipped past
- the node tab and we didn't notice.
- -- Accounting - Fix an invalid memory read when slurmctld sends data about
- start job to slurmdbd.
- -- If a prolog or epilog failure occurs, drain the node rather than setting it
- down and killing all of its jobs.
- -- Priority/multifactor - Avoid underflow in half-life calculation.
- -- POE - pack missing variable to allow fanout (more than 32 nodes)
- -- Prevent clearing reason field for pending jobs. This bug was introduced in
- v2.5.5 (see "Reject job at submit time ...").
- -- BGQ - Fix issue with preemption on sub-block jobs where a job would kill
- all preemptable jobs on the midplane instead of just the ones it needed to.
- -- switch/nrt - Validate dynamic window allocation size.
- -- BGQ - When --geo is requested do not impose the default conn_types.
- -- CRAY - Support CLE 4.2.0
- -- RebootNode logic - Defers (rather than forgets) reboot request with job
- running on the node within a reservation.
- -- switch/nrt - Correct network_id use logic. Correct support for user sn_all
- and sn_single options.
- -- sched/backfill - Modify logic to reduce overhead under heavy load.
- -- Fix job step allocation with --exclusive and --hostlist option.
- -- Select/cons_res - Fix bug resulting in error of "cons_res: sync loop not
- progressing, holding job #"
- -- checkpoint/blcr - Reset max_nodes from zero to NO_VAL on job restart.
- -- launch/poe - Fix for hostlist file support with repeated host names.
- -- priority/multifactor2 - Prevent possible divide by zero.
- -- srun - Don't check for executable if --test-only flag is used.
- -- energy - On a single node only use the last task for gathering energy.
- Since we don't currently track energy usage per task (only per step).
- Otherwise we get double the energy.
- * Changes in Slurm 2.5.6
- ========================
- -- Gres fix for requeued jobs.
- -- Gres accounting - Fix regression in 2.5.5 for keeping track of gres
- requested and allocated.
- * Changes in Slurm 2.5.5
- ========================
- -- Fix for sacctmgr add qos to handle the 'flags' option.
- -- Export SLURM_ environment variables from sbatch, even if "--export"
- option does not explicitly list them.
- -- If node is in more than one partition, correct counting of allocated CPUs.
- -- If step requests more CPUs than possible in specified node count of job
- allocation then return ESLURM_TOO_MANY_REQUESTED_CPUS rather than
- ESLURM_NODES_BUSY and retrying.
- -- CRAY - Fix SLURM_TASKS_PER_NODE to be set correctly.
- -- Accounting - more checks for strings with a possible `'` in it.
- -- sreport - Fix by adding planned down time to utilization reports.
- -- Do not report an error when sstat identifies job steps terminated during
- its execution, but log using debug type message.
- -- Select/cons_res - Permit node removed from job by going down to be returned
- to service and re-used by another job.
- -- Select/cons_res - Tighter packing of job allocations on sockets.
- -- SlurmDBD - fix to allow user root along with the slurm user to register a
- cluster.
- -- Select/cons_res - Fix for support of consecutive node option.
- -- Select/cray - Modify build to enable direct use of libslurm library.
- -- Bug fixes related to job step allocation logic.
- -- Cray - Disable enforcement of MaxTasksPerNode, which is not applicable
- with launch/aprun.
- -- Accounting - When rolling up data from past usage ignore "idle" time from
- a reservation when it has the "Ignore_Jobs" flag set. Since jobs could run
- outside of the reservation in it's nodes without this you could have
- double time.
- -- Accounting - Minor fix to avoid reuse of variable erroneously.
- -- Reject job at submit time if the node count is invalid. Previously such a
- job submitted to a DOWN partition would be queued.
- -- Purge vestigial job scripts when the slurmd cold starts or slurmstepd
- terminates abnormally.
- -- Add support for FreeBSD.
- -- Add sanity check for NULL cluster names trying to register.
- -- BGQ - Push action 'D' info to scontrol for admins.
- -- Reset a job's reason from PartitionDown when the partition is set up.
- -- BGQ - Handle issue where blocks would have a pending job on them and
- while it was free cnodes would go into software error and kill the job.
- -- BGQ - Fix issue where if for some reason we are freeing a block with
- a pending job on it we don't kill the job.
- -- BGQ - Fix race condition were a job could of been removed from a block
- without it still existing there. This is extremely rare.
- -- BGQ - Fix for when a step completes in Slurm before the runjob_mux notifies
- the slurmctld there were software errors on some nodes.
- -- BGQ - Fix issue on state recover if block states are not around
- and when reading in state from DB2 we find a block that can't be created.
- You can now do a clean start to rid the bad block.
- -- Modify slurmdbd to retransmit to slurmctld daemon if it is not responding.
- -- BLUEGENE - Fix issue where when doing backfill preemptable jobs were
- never looked at to determine eligibility of backfillable job.
- -- Cray/BlueGene - Disable srun --pty option unless LaunchType=launch/slurm.
- -- CRAY - Fix sanity check for systems with more than 32 cores per node.
- -- CRAY - Remove other objects from MySQL query that are available from
- the XML.
- -- BLUEGENE - Set the geometry of a job when a block is picked and the job
- isn't a sub-block job.
- -- Cray - avoid check of macro versions of CLE for version 5.0.
- -- CRAY - Fix memory issue with reading in the cray.conf file.
- -- CRAY - If hostlist is given with srun make sure the node count is the same
- as the hosts given.
- -- CRAY - If task count specified, but no tasks-per-node, then set the tasks
- per node in the BASIL reservation request.
- -- CRAY - fix issue with --mem option not giving correct amount of memory
- per cpu.
- -- CRAY - Fix if srun --mem is given outside an allocation to set the
- APRUN_DEFAULT_MEMORY env var for aprun. This scenario will not display
- the option when used with --launch-cmd.
- -- Change sview to use GMutex instead of GStaticMutex
- -- CRAY - set APRUN_DEFAULT_MEMROY instead of CRAY_AUTO_APRUN_OPTIONS
- -- sview - fix issue where if a partition was completely in one state the
- cpu count would be reflected correctly.
- -- BGQ - fix for handling half rack system in STATIC of OVERLAP mode to
- implicitly create full system block.
- -- CRAY - Dynamically create BASIL XML buffer to resize as needed.
- -- Fix checking if QOS limit MaxCPUMinsPJ is set along with DenyOnLimit to
- deny the job instead of holding it.
- -- Make sure on systems that use a different launcher than launch/slurm not
- to attempt to signal tasks on the frontend node.
- -- Cray - when a step is requested count other steps running on nodes in the
- allocation as taking up the entire node instead of just part of the node
- allocated. And always enforce exclusive on a step request.
- -- Cray - display correct nodelist, node/cpu count on steps.
- * Changes in Slurm 2.5.4
- ========================
- -- Fix bug in PrologSlurmctld use that would block job steps until node
- responds.
- -- CRAY - If a partition has MinNodes=0 and a batch job doesn't request nodes
- put the allocation to 1 instead of 0 which prevents the allocation to
- happen.
- -- Better debug when the database is down and using the --cluster option in
- the user commands.
- -- When asking for job states with sacct, default to 'now' instead of midnight
- of the current day.
- -- Fix for handling a test-only job or immediate job that fails while being
- built.
- -- Comment out all of the logic in the job_submit/defaults plugin. The logic
- is only an example and not meant for actual use.
- -- Eliminate configuration file 4096 character line limitation.
- -- More robust logic for tree message forward
- -- BGQ - When cnodes fail in a timeout fashion correctly look up parent
- midplane.
- -- Correct sinfo "%c" (node's CPU count) output value for Bluegene systems.
- -- Backfill - Responsive improvements for systems with large numbers of jobs
- (>5000) and using the SchedulerParameters option bf_max_job_user.
- -- slurmstepd: ensure that IO redirection openings from/to files correctly
- handle interruption
- -- BGQ - Able to handle when midplanes go into Hardware::SoftwareFailure
- -- GRES - Correct tracking of specific resources used after slurmctld restart.
- Counts would previously go negative as jobs terminate and decrement from
- a base value of zero.
- -- Fix for priority/multifactor2 plugin to not assert when configured with
- --enable-debug.
- -- Select/cons_res - If the job request specified --ntasks-per-socket and the
- allocation using is cores, then pack the tasks onto the sockets up to the
- specified value.
- -- BGQ - If a cnode goes into an 'error' state and the block containing the
- cnode does not have a job running on it do not resume the block.
- -- BGQ - Handle blocks that don't free themselves in a reasonable time better.
- -- BGQ - Fix for signaling steps when allocation ends before step.
- -- Fix for backfill scheduling logic with job preemption; starts more jobs.
- -- xcgroup - remove bugs with EINTR management in write calls
- -- jobacct_gather - fix total values to not always == the max values.
- -- Fix for handling node registration messages from older versions without
- energy data.
- -- BGQ - Allow user to request full dimensional mesh.
- -- sdiag command - Correction to jobs started value reported.
- -- Prevent slurmctld assert when invalid change to reservation with running
- jobs is made.
- -- BGQ - If signal is NODE_FAIL allow forward even if job is completing
- and timeout in the runjob_mux trying to send in this situation.
- -- BGQ - More robust checking for correct node, task, and ntasks-per-node
- options in srun, and push that logic to salloc and sbatch.
- -- GRES topology bug in core selection logic fixed.
- -- Fix to handle init.d script for querying status and not return 1 on
- success.
- * Changes in SLURM 2.5.3
- ========================
- -- Gres/gpu plugin - If no GPUs requested, set CUDA_VISIBLE_DEVICES=NoDevFiles.
- This bug was introduced in 2.5.2 for the case where a GPU count was
- configured, but without device files.
- -- task/affinity plugin - Fix bug in CPU masks for some processors.
- -- Modify sacct command to get format from SACCT_FORMAT environment variable.
- -- BGQ - Changed order of library inclusions and fixed incorrect declaration
- to compile correctly on newer compilers
- -- Fix for not building sview if glib exists on a system but not the gtk libs.
- -- BGQ - Fix for handling a job cleanup on a small block if the job has long
- since left the system.
- -- Fix race condition in job dependency logic which can result in invalid
- memory reference.
- * Changes in SLURM 2.5.2
- ========================
- -- Fix advanced reservation recovery logic when upgrading from version 2.4.
- -- BLUEGENE - fix for QOS/Association node limits.
- -- Add missing "safe" flag from print of AccountStorageEnforce option.
- -- Fix logic to optimize GRES topology with respect to allocated CPUs.
- -- Add job_submit/all_partitions plugin to set a job's default partition
- to ALL available partitions in the cluster.
- -- Modify switch/nrt logic to permit build without libnrt.so library.
- -- Handle srun task launch failure without duplicate error messages or abort.
- -- Fix bug in QoS limits enforcement when slurmctld restarts and user not yet
- added to the QOS list.
- -- Fix issue where sjstat and sjobexitmod was installed in 2 different RPMs.
- -- Fix for job request of multiple partitions in which some partitions lack
- nodes with required features.
- -- Permit a job to use a QOS they do not have access to if an administrator
- manually set the job's QOS (previously the job would be rejected).
- -- Make more variables available to job_submit/lua plugin: slurm.MEM_PER_CPU,
- slurm.NO_VAL, etc.
- -- Fix topology/tree logic when nodes defined in slurm.conf get re-ordered.
- -- In select/cons_res, correct logic to allocate whole sockets to jobs. Work
- by Magnus Jonsson, Umea University.
- -- In select/cons_res, correct logic when job removed from only some nodes.
- -- Avoid apparent kernel bug in 2.6.32 which apparently is solved in
- at least 3.5.0. This avoids a stack overflow when running jobs on
- more than 120k nodes.
- -- BLUEGENE - If we made a block that isn't runnable because of a overlapping
- block, destroy it correctly.
- -- Switch/nrt - Dynamically load libnrt.so from within the plugin as needed.
- This eliminates the need for libnrt.so on the head node.
- -- BLUEGENE - Fix in reservation logic that could cause abort.
- * Changes in SLURM 2.5.1
- ========================
- -- Correction to hostlist sorting for hostnames that contain two numeric
- components and the first numeric component has various sizes (e.g.
- "rack9blade1" should come before "rack10blade1")
- -- BGQ - Only poll on initialized blocks instead of calling getBlocks on
- each block independently.
- -- Fix of task/affinity plugin logic for Power7 processors having hyper-
- threading disabled (cpu mask has gaps).
- -- Fix of job priority ordering with sched/builtin and priority/multifactor.
- Patch from Chris Read.
- -- CRAY - Fix for setting up the aprun for a large job (+2000 nodes).
- -- Fix for race condition related to compute node boot resulting in node being
- set down with reason of "Node <name> unexpectedly rebooted"
- -- RAPL - Fix for handling errors when opening msr files.
- -- BGQ - Fix for salloc/sbatch to do the correct allocation when asking for
- -N1 -n#.
- -- BGQ - in emulation make it so we can pretend to run large jobs (>64k nodes)
- -- BLUEGENE - Correct method to update conn_type of a job.
- -- BLUEGENE - Fix issue with preemption when needing to preempt multiple jobs
- to make one job run.
- -- Fixed issue where if an srun dies inside of an allocation abnormally it
- would of also killed the allocation.
- -- FRONTEND - fixed issue where if a systems nodes weren't defined in the
- slurm.conf with NodeAddr's signals going to a step could be handled
- incorrectly.
- -- If sched/backfill starts a job with a QOS having NO_RESERVE and not job
- time limit, start it with the partition time limit (or one year if the
- partition has no time limit) rather than NO_VAL (140 year time limit);
- -- Alter hostlist logic to allocate large grid dynamically instead of on
- stack.
- -- Change RPC version checks to support version 2.5 slurmctld with version 2.4
- slurmd daemons.
- -- Correct core reservation logic for use with select/serial plugin.
- -- Exit scontrol command on stdin EOF.
- -- Disable job --exclusive option with select/serial plugin.
- * Changes in SLURM 2.5.0
- ========================
- -- Add DenyOnLimit flag for QOS to deny jobs at submission time if they
- request resources that reach a 'Max' limit.
- -- Permit SlurmUser or operator to change QOS of non-pending jobs (e.g.
- running jobs).
- -- BGQ - move initial poll to beginning of realtime interaction, which will
- also cause it to run if the realtime server ever goes away.
- * Changes in SLURM 2.5.0-rc2
- ============================
- -- Modify sbcast logic to survive slurmd daemon restart while file a
- transmission is in progress.
- -- Add retry logic to munge encode/decode calls. This is needed if the munge
- deamon is under very heavy load (e.g. with 1000 slurmd daemons per compute
- node).
- -- Add launch and acct_gather_energy plugins to RPMs.
- -- Restore support for srun "--mpi=list" option.
- -- CRAY - Introduce step accounting for a Cray.
- -- Modify srun to abandon I/O 60 seconds after the last task ends. Otherwise
- an aborted slurmstepd can cause the srun process to hang indefinitely.
- -- ENERGY - RAPL - alter code to close open files (and only open them once
- where needed)
- -- If the PrologSlurmctld fails, then requeue the job an indefinite number
- of times instead of only one time.
- * Changes in SLURM 2.5.0-rc1
- ============================
- -- Added Prolog and Epilog Guide (web page). Based upon work by Jason Sollom,
- Cray Inc. and used by permission.
- -- Restore gang scheduling functionality. Preemptor was not being scheduled.
- Fix for bugzilla #3.
- -- Add "cpu_load" to node information. Populate CPULOAD in node information
- reported to Moab cluster manager.
- -- Preempt jobs only when insufficient idle resources exist to start job,
- regardless of the node weight.
- -- Added priority/multifactor2 plugin based upon ticket distribution system.
- Work by Janne Blomqvist, Aalto University.
- -- Add SLURM_NODELIST to environment variables available to Prolog and Epilog.
- -- Permit reservations to allow or deny access by account and/or user.
- -- Add ReconfigFlags value of KeepPartState. See "man slurm.conf" for details.
- -- Modify the task/cgroup plugin adding a task_pre_launch_priv function and
- move slurmstepd outside of the step's cgroup. Work by Matthieu Hautreux.
- -- Intel MIC processor support added using gres/mic plugin. BIG thanks to
- Olli-Pekka Lehto, CSC-IT Center for Science Ltd.
- -- Accounting - Change empty jobacctinfo structs to not actually be used
- instead of putting 0's into the database we put NO_VALS and have sacct
- figure out jobacct_gather wasn't used.
- -- Cray - Prevent calling basil_confirm more than once per job using a flag.
- -- Fix bug with topology/tree and job with min-max node count. Now try to
- get max node count rather than minimizing leaf switches used.
- -- Add AccountingStorageEnforce=safe option to provide method to avoid jobs
- launching that wouldn't be able to run to completion because of a
- GrpCPUMins limit.
- -- Add support for RFC 5424 timestamps in logfiles. Disable with configuration
- option of "--disable-rfc5424time". By Janne Blomqvist, Aalto University.
- -- CRAY - Replace srun.pl with launch/aprun plugin to use srun to wrap the
- aprun process instead of a perl script.
- -- srun - Rename --runjob-opts to --launcher-opts to be used on systems other
- than BGQ.
- -- Added new DebugFlags - Energy for AcctGatherEnergy plugins.
- -- start deprecation of sacct --dump --fdump
- -- BGQ - added --verbose=OFF when srun --quiet is used
- -- Added acct_gather_energy/rapl plugin to record power consumption by job.
- Work by Yiannis Georgiou, Martin Perry, et. al., Bull
- * Changes in SLURM 2.5.0.pre3
- =============================
- -- Add Google search to all web pages.
- -- Add sinfo -T option to print reservation information. Work by Bill Brophy,
- Bull.
- -- Force slurmd exit after 2 minute wait, even if threads are hung.
- -- Change node_req field in struct job_resources from 8 to 32 bits so we can
- run more than 256 jobs per node.
- -- sched/backfill: Improve accuracy of expected job start with respect to
- reservations.
- -- sinfo partition field size will be set the the length of the longest
- partition name by default.
- -- Make it so the parse_time will return a valid 0 if given epoch time and
- set errno == ESLURM_INVALID_TIME_VALUE on error instead.
- -- Correct srun --no-alloc logic when node count exceeds node list or task
- task count is not a multiple of the node count. Work by Hongjia Cao, NUDT.
- -- Completed integration with IBM Parallel Environment including POE and IBM's
- NRT switch library.
- * Changes in SLURM 2.5.0.pre2
- =============================
- -- When running with multiple slurmd daemons per node, enable specifying a
- range of ports on a single line of the node configuration in slurm.conf.
- -- Add reservation flag of Part_Nodes to allocate all nodes in a partition to
- a reservation and automatically change the reservation when nodes are
- added to or removed from the reservation. Based upon work by
- Bill Brophy, Bull.
- -- Add support for advanced reservation for specific cores rather than whole
- nodes. Current limiations: homogeneous cluster, nodes idle when reservation
- created, and no more than one reservation per node. Code is still under
- development. Work by Alejandro Lucero Palau, et. al, BSC.
- -- Add DebugFlag of Switch to log switch plugin details.
- -- Correct job node_cnt value in job completion plugin when job fails due to
- down node. Previously was too low by one.
- -- Add new srun option --cpu-freq to enable user control over the job's CPU
- frequency and thus it's power consumption. NOTE: cpu frequency is not
- currently preserved for jobs being suspended and later resumed. Work by
- Don Albert, Bull.
- -- Add node configuration information about "boards" and optimize task
- placement on minimum number of boards. Work by Rod Schultz, Bull.
- * Changes in SLURM 2.5.0.pre1
- =============================
- -- Add new output to "scontrol show configuration" of LicensesUsed. Output is
- "name:used/total"
- -- Changed jobacct_gather plugin infrastructure to be cleaner and easier to
- maintain.
- -- Change license option count separator from "*" to ":" for consistency with
- the gres option (e.g. "--licenses=foo:2 --gres=gpu:2"). The "*" will still
- be accepted, but is no longer documented.
- -- Permit more than 100 jobs to be scheduled per node (new limit is 250
- jobs).
- -- Restructure of srun code to allow outside programs to utilize existing
- logic.
- * Changes in SLURM 2.4.6
- ========================
- -- Correct WillRun authentication logic when issued for non-job owner.
- -- BGQ - fix memory leak
- -- BGQ - Fix to check block for action 'D' if it also has nodes in error.
- * Changes in SLURM 2.4.5
- ========================
- -- Cray - On job kill requeust, send SIGCONT, SIGTERM, wait KillWait and send
- SIGKILL. Previously just sent SIGKILL to tasks.
- -- BGQ - Fix issue when running srun outside of an allocation and only
- specifying the number of tasks and not the number of nodes.
- -- BGQ - validate correct ntasks_per_node
- -- BGQ - when srun -Q is given make runjob be quiet
- -- Modify use of OOM (out of memory protection) for Linux 2.6.36 kernel
- or later. NOTE: If you were setting the environment variable
- SLURMSTEPD_OOM_ADJ=-17, it should be set to -1000 for Linux 2.6.36 kernel
- or later.
- -- BGQ - Fix job step timeout actually happen when done from within an
- allocation.
- -- Reset node MAINT state flag when a reservation's nodes or flags change.
- -- Accounting - Fix issue where QOS usage was being zeroed out on a
- slurmctld restart.
- -- BGQ - Add 64 tasks per node as a valid option for srun when used with
- overcommit.
- -- BLUEGENE - With Dynamic layout mode - Fix issue where if a larger block
- was already in error and isn't deallocating and underlying hardware goes
- bad one could get overlapping blocks in error making the code assert when
- a new job request comes in.
- -- BGQ - handle pending actions on a block better when trying to deallocate it.
- -- Accounting - Fixed issue where if nodenames have changed on a system and
- you query against that with -N and -E you will get all jobs during that
- time instead of only the ones running on -N.
- -- BGP - Fix for HTC mode
- -- Accounting - If a job start message fails to the SlurmDBD reset the db_inx
- so it gets sent again. This isn't a major problem since the start will
- happen when the job ends, but this does make things cleaner.
- -- If an salloc is waiting for an allocation to happen and is canceled by the
- user mark the state canceled instead of completed.
- -- Fix issue in accounting if a user puts a '\' in their job name.
- -- Accounting - Fix for if asking for users or accounts that were deleted
- with associations get the deleted associations as well.
- -- BGQ - Handle shared blocks that need to be removed and have jobs running
- on them. This should only happen in extreme conditions.
- -- Fix inconsistency for hostlists that have more than 1 range.
- -- BGQ - Add mutex around recovery for the Real Time server to avoid hitting
- DB2 so hard.
- -- BGQ - If an allocation exists on a block that has a 'D' action on it fail
- job on future step creation attempts.
- * Changes in SLURM 2.4.4
- ========================
- -- BGQ - minor fix to make build work in emulated mode.
- -- BGQ - Fix if large block goes into error and the next highest priority jobs
- are planning on using the block. Previously it would fail those jobs
- erroneously.
- -- BGQ - Fix issue when a cnode going to an error (not SoftwareError) state
- with a job running or trying to run on it.
- -- Execute slurm_spank_job_epilog when there is no system Epilog configured.
- -- Fix for srun --test-only to work correctly with timelimits
- -- BGQ - If a job goes away while still trying to free it up in the
- database, and the job is running on a small block make sure we free up
- the correct node count.
- -- BGQ - Logic added to make sure a job has finished on a block before it is
- purged from the system if its front-end node goes down.
- -- Modify strigger so that a filter option of "--user=0" is supported.
- -- Correct --mem-per-cpu logic for core or socket allocations with multiple
- threads per core.
- -- Fix for older < glibc 2.4 systems to use euidaccess() instead of eaccess().
- -- BLUEGENE - Do not alter a pending job's node count when changing it's
- partition.
- -- BGQ - Add functionality to make it so we track the actions on a block.
- This is needed for when a free request is added to a block but there are
- jobs finishing up so we don't start new jobs on the block since they will
- fail on start.
- -- BGQ - Fixed InactiveLimit to work correctly to avoid scenarios where a
- user's pending allocation was started with srun and then for some reason
- the slurmctld was brought down and while it was down the srun was removed.
- -- Fixed InactiveLimit math to work correctly
- -- BGQ - Add logic to make it so blocks can't use a midplane with a nodeboard
- in error for passthrough.
- -- BGQ - Make it so if a nodeboard goes in error any block using that midplane
- for passthrough gets removed on a dynamic system.
- -- BGQ - Fix for printing realtime server debug correctly.
- -- BGQ - Cleaner handling of cnode failures when reported through the runjob
- interface instead of through the normal method.
- -- smap - spread node information across multiple lines for larger systems.
- -- Cray - Defer salloc until after PrologSlurmctld completes.
- -- Correction to slurmdbd communications failure handling logic, incorrect
- error codes returned in some cases.
- * Changes in SLURM 2.4.3
- ========================
- -- Accounting - Fix so complete 32 bit numbers can be put in for a priority.
- -- cgroups - fix if initial directory is non-existent SLURM creates it
- correctly. Before the errno wasn't being checked correctly
- -- BGQ - fixed srun when only requesting a task count and not a node count
- to operate the same way salloc or sbatch did and assign a task per cpu
- by default instead of task per node.
- -- Fix salloc --gid to work correctly. Reported by Brian Gilmer
- -- BGQ - fix smap to set the correct default MloaderImage
- -- BLUEGENE - updated documentation.
- -- Close the batch job's environment file when it contains no data to avoid
- leaking file descriptors.
- -- Fix sbcast's credential to last till the end of a job instead of the
- previous 20 minute time limit. The previous behavior would fail for
- large files 20 minutes into the transfer.
- -- Return ESLURM_NODES_BUSY rather than ESLURM_NODE_NOT_AVAIL error on job
- submit when required nodes are up, but completing a job or in exclusive
- job allocation.
- -- Add HWLOC_FLAGS so linking to libslurm works correctly
- -- BGQ - If using backfill and a shared block is running at least one job
- and a job comes through backfill and can fit on the block without ending
- jobs don't set an end_time for the running jobs since they don't need to
- end to start the job.
- -- Initialize bind_verbose when using task/cgroup.
- -- BGQ - Fix for handling backfill much better when sharing blocks.
- -- BGQ - Fix for making small blocks on first pass if not sharing blocks.
- -- BLUEGENE - Remove force of default conn_type instead of leaving NAV
- when none are requested. The Block allocator sets it up temporarily so
- this isn't needed.
- -- BLUEGENE - Fix deadlock issue when dealing with bad hardware if using
- static blocks.
- -- Fix to mysql plugin during rollup to only query suspended table when jobs
- reported some suspended time.
- -- Fix compile with glibc 2.16 (Kacper Kowalik)
- -- BGQ - fix for deadlock where a block has error on it and all jobs
- running on it are preemptable by scheduling job.
- -- proctrack/cgroup: Exclude internal threads from "scontrol list pids".
- Patch from Matthieu Hautreux, CEA.
- -- Memory leak fixed for select/linear when preempting jobs.
- -- Fix if updating begin time of a job to update the eligible time in
- accounting as well.
- -- BGQ - make it so you can signal steps when signaling the job allocation.
- -- BGQ - Remove extra overhead if a large block has many cnode failures.
- -- Priority/Multifactor - Fix issue with age factor when a job is estimated to
- start in the future but is able to run now.
- -- CRAY - update to work with ALPS 5.1
- -- BGQ - Handle issue of speed and mutexes when polling instead of using the
- realtime server.
- -- BGQ - Fix minor sorting issue with sview when sorting by midplanes.
- -- Accounting - Fix for handling per user max node/cpus limits on a QOS
- correctly for current job.
- -- Update documentation for -/+= when updating a reservation's
- users/accounts/flags
- -- Update pam module to work if using aliases on nodes instead of actual
- host names.
- -- Correction to task layout logic in select/cons_res for job with minimum
- and maximum node count.
- -- BGQ - Put final poll after realtime comes back into service to avoid
- having the realtime server go down over and over again while waiting
- for the poll to finish.
- -- task/cgroup/memory - ensure that ConstrainSwapSpace=no is correctly
- handled. Work by Matthieu Hautreux, CEA.
- -- CRAY - Fix for sacct -N option to work correctly
- -- CRAY - Update documentation to describe installation from rpm instead
- or previous piecemeal method.
- -- Fix sacct to work with QOS' that have previously been deleted.
- -- Added all available limits to the output of sacctmgr list qos
- * Changes in SLURM 2.4.2
- ========================
- -- BLUEGENE - Correct potential deadlock issue when hardware goes bad and
- there are jobs running on that hardware.
- -- If job is submitted to more than one partition, it's partition pointer can
- be set to an invalid value. This can result in the count of CPUs allocated
- on a node being bad, resulting in over- or under-allocation of its CPUs.
- Patch by Carles Fenoy, BSC.
- -- Fix bug in task layout with select/cons_res plugin and --ntasks-per-node
- option. Patch by Martin Perry, Bull.
- -- BLUEGENE - remove race condition where if a block is removed while waiting
- for a job to finish on it the number of unused cpus wasn't updated
- correctly.
- -- BGQ - make sure we have a valid block when creating or finishing a step
- allocation.
- -- BLUEGENE - If a large block (> 1 midplane) is in error and underlying
- hardware is marked bad remove the larger block and create a block over
- just the bad hardware making the other hardware available to run on.
- -- BLUEGENE - Handle job completion correctly if an admin removes a block
- where other blocks on an overlapping midplane are running jobs.
- -- BLUEGENE - correctly remove running jobs when freeing a block.
- -- BGQ - correct logic to place multiple (< 1 midplane) steps inside a
- multi midplane block allocation.
- -- BGQ - Make it possible for a multi midplane allocation to run on more
- than 1 midplane but not the entire allocation.
- -- BGL - Fix for syncing users on block from Tim Wickberg
- -- Fix initialization of protocol_version for some messages to make sure it
- is always set when sending or receiving a message.
- -- Reset backfilled job counter only when explicitly cleared using scontrol.
- Patch from Alejandro Lucero Palau, BSC.
- -- BLUEGENE - Fix for handling blocks when a larger block will not free and
- while it is attempting to free underlying hardware is marked in error
- making small blocks overlapping with the freeing block. This only
- applies to dynamic layout mode.
- -- Cray and BlueGene - Do not treat lack of usable front-end nodes when
- slurmctld deamon starts as a fatal error. Also preserve correct front-end
- node for jobs when there is more than one front-end node and the slurmctld
- daemon restarts.
- -- Correct parsing of srun/sbatch input/output/error file names so that only
- the name "none" is mapped to /dev/null and not any file name starting
- with "none" (e.g. "none.o").
- -- BGQ - added version string to the load of the runjob_mux plugin to verify
- the current plugin has been loaded when using runjob_mux_refresh_config
- -- CGROUPS - Use system mount/umount function calls instead of doing fork
- exec of mount/umount from Janne Blomqvist.
- -- BLUEGENE - correct start time setup when no jobs are blocking the way
- from Mark Nelson
- -- Fixed sacct --state=S query to return information about suspended jobs
- current or in the past.
- -- FRONTEND - Made error warning more apparent if a frontend node isn't
- configured correctly.
- -- BGQ - update documentation about runjob_mux_refresh_config which works
- correctly as of IBM driver V1R1M1 efix 008.
- * Changes in SLURM 2.4.1
- ========================
- -- Fix bug for job state change from 2.3 -> 2.4 job state can now be preserved
- correctly when transitioning. This also applies for 2.4.0 -> 2.4.1, no
- state will be lost. (Thanks to Carles Fenoy)
- * Changes in SLURM 2.4.0
- ========================
- -- Cray - Improve support for zero compute note resource allocations.
- Partition used can now be configured with no nodes nodes.
- -- BGQ - make it so srun -i<taskid> works correctly.
- -- Fix parse_uint32/16 to complain if a non-digit is given.
- -- Add SUBMITHOST to job state passed to Moab vial sched/wiki2. Patch by Jon
- Bringhurst (LANL).
- -- BGQ - Fix issue when running with AllowSubBlockAllocations=Yes without
- compiling with --enable-debug
- -- Modify scontrol to require "-dd" option to report batch job's script. Patch
- from Don Albert, Bull.
- -- Modify SchedulerParamters option to match documentation: "bf_res="
- changed to "bf_resolution=". Patch from Rod Schultz, Bull.
- -- Fix bug that clears job pending reason field. Patch fron Don Lipari, LLNL.
- -- In etc/init.d/slurm move check for scontrol after sourcing
- /etc/sysconfig/slurm. Patch from Andy Wettstein, University of Chicago.
- -- Fix in scheduling logic that can delay jobs with min/max node counts.
- -- BGQ - fix issue where if a step uses the entire allocation and then
- the next step in the allocation only uses part of the allocation it gets
- the correct cnodes.
- -- BGQ - Fix checking for IO on a block with new IBM driver V1R1M1 previous
- function didn't always work correctly.
- -- BGQ - Fix issue when a nodeboard goes down and you want to combine blocks
- to make a larger small block and are running with sub-blocks.
- -- BLUEGENE - Better logic for making small blocks around bad nodeboard/card.
- -- BGQ - When using an old IBM driver cnodes that go into error because of
- a job kill timeout aren't always reported to the system. This is now
- handled by the runjob_mux plugin.
- -- BGQ - Added information on how to setup the runjob_mux to run as SlurmUser.
- -- Improve memory consumption on step layouts with high task count.
- -- BGQ - quiter debug when the real time server comes back but there are
- still messages we find when we poll but haven't given it back to the real
- time yet.
- -- BGQ - fix for if a request comes in smaller than the smallest block and
- we must use a small block instead of a shared midplane block.
- -- Fix issues on large jobs (>64k tasks) to have the correct counter type when
- packing the step layout structure.
- -- BGQ - fix issue where if a user was asking for tasks and ntasks-per-node
- but not node count the node count is correctly figured out.
- -- Move logic to always use the 1st alphanumeric node as the batch host for
- batch jobs.
- -- BLUEGENE - fix race condition where if a nodeboard/card goes down at the
- same time a block is destroyed and that block just happens to be the
- smallest overlapping block over the bad hardware.
- -- Fix bug when querying accounting looking for a job node size.
- -- BLUEGENE - fix possible race condition if cleaning up a block and the
- removal of the job on the block failed.
- -- BLUEGENE - fix issue if a cable was in an error state make it so we can
- check if a block is still makable if the cable wasn't in error.
- -- Put nodes names in alphabetic order in node table.
- -- If preempted job should have a grace time and preempt mode is not cancel
- but job is going to be canceled because it is interactive or other reason
- it now receives the grace time.
- -- BGQ - Modified documents to explain new plugin_flags needed in bg.properties
- in order for the runjob_mux to run correctly.
- -- BGQ - change linking from libslurm.o to libslurmhelper.la to avoid warning.
- * Changes in SLURM 2.4.0.rc1
- =============================
- -- Improve task binding logic by making fuller use of HWLOC library,
- especially with respect to Opteron 6000 series processors. Work contributed
- by Komoto Masahiro.
- -- Add new configuration parameter PriorityFlags, based upon work by
- Carles Fenoy (Barcelona Supercomputer Center).
- -- Modify the step completion RPC between slurmd and slurmstepd in order to
- eliminate a possible deadlock. Based on work by Matthieu Hautreux, CEA.
- -- Change the owner of slurmctld and slurmdbd log files to the appropriate
- user. Without this change the files will be created by and owned by the
- user starting the daemons (likely user root).
- -- Reorganize the slurmstepd logic in order to better support NFS and
- Kerberos credentials via the AUKS plugin. Work by Matthieu Hautreux, CEA.
- -- Fix bug in allocating GRES that are associated with specific CPUs. In some
- cases the code allocated first available GRES to job instead of allocating
- GRES accessible to the specific CPUs allocated to the job.
- -- spank: Add callbacks in slurmd: slurm_spank_slurmd_{init,exit}
- and job epilog/prolog: slurm_spank_job_{prolog,epilog}
- -- spank: Add spank_option_getopt() function to api
- -- Change resolution of switch wait time from minutes to seconds.
- -- Added CrpCPUMins to the output of sshare -l for those using hard limit
- accounting. Work contributed by Mark Nelson.
- -- Added mpi/pmi2 plugin for complete support of pmi2 including acquiring
- additional resources for newly launched tasks. Contributed by Hongjia Cao,
- NUDT.
- -- BGQ - fixed issue where if a user asked for a specific node count and more
- tasks than possible without overcommit the request would be allowed on more
- nodes than requested.
- -- Add support for new SchedulerParameters of bf_max_job_user, maximum number
- of jobs to attempt backfilling per user. Work by Bjørn-Helge Mevik,
- University of Oslo.
- -- BLUEGENE - fixed issue where MaxNodes limit on a partition only limited
- larger than midplane jobs.
- -- Added cpu_run_min to the output of sshare --long. Work contributed by
- Mark Nelson.
- -- BGQ - allow regular users to resolve Rack-Midplane to AXYZ coords.
- -- Add sinfo output format option of "%R" for partition name without "*"
- appended for default partition.
- -- Cray - Add support for zero compute note resource allocation to run batch
- script on front-end node with no ALPS reservation. Useful for pre- or post-
- processing.
- -- Support for cyclic distribution of cpus in task/cgroup plugin from Martin
- Perry, Bull.
- -- GrpMEM limit for QOSes and associations added Patch from Bjørn-Helge Mevik,
- University of Oslo.
- -- Various performance improvements for up to 500% higher throughput depending
- upon configuration. Work supported by the Oak Ridge National Laboratory
- Extreme Scale Systems Center.
- -- Added jobacct_gather/cgroup plugin. It is not advised to use this in
- production as it isn't currently complete and doesn't provide an equivalent
- substitution for jobacct_gather/linux yet. Work by Martin Perry, Bull.
- * Changes in SLURM 2.4.0.pre4
- =============================
- -- Add logic to cache GPU file information (bitmap index mapping to device
- file number) in the slurmd daemon and transfer that information to the
- slurmstepd whenever a job step is initiated. This is needed to set the
- appropriate CUDA_VISIBLE_DEVICES environment variable value when the
- devices are not in strict numeric order (e.g. some GPUs are skipped).
- Based upon work by Nicolas Bigaouette.
- -- BGQ - Remove ability to make a sub-block with a geometry with one or more
- of it's dimensions of length 3. There is a limitation in the IBM I/O
- subsystem that is problematic with multiple sub-blocks with a dimension
- of length 3, so we will disallow them to be able to be created. This
- mean you if you ask the system for an allocation of 12 c-nodes you will
- be given 16. If this is ever fix in BGQ you can remove this patch.
- -- BLUEGENE - Better handling blocks that go into error state or deallocate
- while jobs are running on them.
- -- BGQ - fix for handling mix of steps running at same time some of which
- are full allocation jobs, and others that are smaller.
- -- BGQ - fix for core dump after running multiple sub-block jobs on static
- blocks.
- -- BGQ - fixed sync issue where if a job finishes in SLURM but not in mmcs
- for a long time after the SLURM job has been flushed from the system
- we don't have to worry about rebooting the block to sync the system.
- -- BGQ - In scontrol/sview node counts are now displayed with
- CnodeCount/CnodeErrCount so to point out there are cnodes in an error state
- on the block. Draining the block and having it reboot when all jobs are
- gone will clear up the cnodes in Software Failure.
- -- Change default SchedulerParameters max_switch_wait field value from 60 to
- 300 seconds.
- -- BGQ - catch errors from the kill option of the runjob client.
- -- BLUEGENE - make it so the epilog runs until slurmctld tells it the job is
- gone. Previously it had a timelimit which has proven to not be the right
- thing.
- -- FRONTEND - fix issue where if a compute node was in a down state and
- an admin updates the node to idle/resume the compute nodes will go
- instantly to idle instead of idle* which means no response.
- -- Fix regression in 2.4.0.pre3 where number of submitted jobs limit wasn't
- being honored for QOS.
- -- Cray - Enable logging of BASIL communications with environment variables.
- Set XML_LOG to enable logging. Set XML_LOG_LOC to specify path to log file
- or "SLURM" to write to SlurmctldLogFile or unset for "slurm_basil_xml.log".
- Patch from Steve Tronfinoff, CSCS.
- -- FRONTEND - if a front end unexpectedly reboots kill all jobs but don't
- mark front end node down.
- -- FRONTEND - don't down a front end node if you have an epilog error
- -- BLUEGENE - if a job has an epilog error don't down the midplane it was
- running on.
- -- BGQ - added new DebugFlag (NoRealTime) for only printing debug from
- state change while the realtime server is running.
- -- Fix multi-cluster mode with sview starting on a non-bluegene cluster going
- to a bluegene cluster.
- -- BLUEGENE - ability to show Rack Midplane name of midplanes in sview and
- scontrol.
- * Changes in SLURM 2.4.0.pre3
- =============================
- -- Let a job be submitted even if it exceeds a QOS limit. Job will be left
- in a pending state until the QOS limit or job parameters change. Patch by
- Phil Eckert, LLNL.
- -- Add sacct support for the option "--name". Work by Yuri D'Elia, Center for
- Biomedicine, EURAC Research, Italy.
- -- BGQ - handle preemption.
- -- Add an srun shepard process to cancel a job and/or step of the srun process
- is killed abnormally (e.g. SIGKILL).
- -- BGQ - handle deadlock issue when a nodeboard goes into an error state.
- -- BGQ - more thorough handling of blocks with multiple jobs running on them.
- -- Fix man2html process to compile in the build directory instead of the
- source dir.
- -- Behavior of srun --multi-prog modified so that any program arguments
- specified on the command line will be appended to the program arguments
- specified in the program configuration file.
- -- Add new command, sdiag, which reports a variety of job scheduling
- statistics. Based upon work by Alejandro Lucero Palau, BSC.
- -- BLUEGENE - Added DefaultConnType to the bluegene.conf file. This makes it
- so you can specify any connection type you would like (TORUS or MESH) as
- the default in dynamic mode. Previously it always defaulted to TORUS.
- -- Made squeue -n and -w options more consistent with salloc, sbatch, srun,
- and scancel. Patch by Don Lipari, LLNL.
- -- Have sacctmgr remove user records when no associations exist for that user.
- -- Several header file changes for clean build with NetBSD. Patches from
- Aleksej Saushev.
- -- Fix for possible deadlock in accounting logic: Avoid calling
- jobacct_gather_g_getinfo() until there is data to read from the socket.
- -- Fix race condition that could generate "job_cnt_comp underflow" errors on
- front-end architectures.
- -- BGQ - Fix issue where a system with missing cables could cause core dump.
- * Changes in SLURM 2.4.0.pre2
- =============================
- -- CRAY - Add support for GPU memory allocation using SLURM GRES (Generic
- RESource) support. Work by Steve Trofinoff, CSCS.
- -- Add support for job allocations with multiple job constraint counts. For
- example: salloc -C "[rack1*2&rack2*4]" ... will allocate the job 2 nodes
- from rack1 and 4 nodes from rack2. Support for only a single constraint
- name been added to job step support.
- -- BGQ - Remove old method for marking cnodes down.
- -- BGQ - Remove BGP images from view in sview.
- -- BGQ - print out failed cnodes in scontrol show nodes.
- -- BGQ - Add srun option of "--runjob-opts" to pass options to the runjob
- command.
- -- FRONTEND - handle step launch failure better.
- -- BGQ - Added a mutex to protect the now changing ba_system pointers.
- -- BGQ - added new functionality for sub-block allocations - no preemption
- for this yet though.
- -- Add --name option to squeue to filter output by job name. Patch from Yuri
- D'Elia.
- -- BGQ - Added linking to runjob client libary which gives support to totalview
- to use srun instead of runjob.
- -- Add numeric range checks to scontrol update options. Patch from Phil
- Eckert, LLNL.
- -- Add ReconfigFlags configuration option to control actions of "scontrol
- reconfig". Patch from Don Albert, Bull.
- -- BGQ - handle reboots with multiple jobs running on a block.
- -- BGQ - Add message handler thread to forward signals to runjob process.
- * Changes in SLURM 2.4.0.pre1
- =============================
- -- BGQ - use the ba_geo_tables to figure out the blocks instead of the old
- algorithm. The improves timing in the worst cases and simplifies the code
- greatly.
- -- BLUEGENE - Change to output tools labels from BP to Midplane
- (i.e. BP List -> MidplaneList).
- -- BLUEGENE - read MPs and BPs from the bluegene.conf
- -- Modify srun's SIGINT handling logic timer (two SIGINTs within one second) to
- be based microsecond rather than second timer.
- -- Modify advance reservation to accept multiple specific block sizes rather
- than a single node count.
- -- Permit administrator to change a job's QOS to any value without validating
- the job's owner has permission to use that QOS. Based upon patch by Phil
- Eckert (LLNL).
- -- Add trigger flag for a permanent trigger. The trigger will NOT be purged
- after an event occurs, but only when explicitly deleted.
- -- Interpret a reservation with Nodes=ALL and a Partition specification as
- reserving all nodes within the specified partition rather than all nodes
- on the system. Based upon patch by Phil Eckert (LLNL).
- -- Add the ability to reboot all compute nodes after they become idle. The
- RebootProgram configuration parameter must be set and an authorized user
- must execute the command "scontrol reboot_nodes". Patch from Andriy
- Grytsenko (Massive Solutions Limited).
- -- Modify slurmdbd.conf parsing to accept DebugLevel strings (quiet, fatal,
- info, etc.) in addition to numeric values. The parsing of slurm.conf was
- modified in the same fashion for SlurmctldDebug and SlurmdDebug values.
- The output of sview and "scontrol show config" was also modified to report
- those values as strings rather than numeric values.
- -- Changed default value of StateSaveLocation configuration parameter from
- /tmp to /var/spool.
- -- Prevent associations from being deleted if it has any jobs in running,
- pending or suspended state. Previous code prevented this only for running
- jobs.
- -- If a job can not run due to QOS or association limits, then do not cancel
- the job, but leave it pending in a system held state (priority = 1). The
- job will run when its limits or the QOS/association limits change. Based
- upon a patch by Phil Ekcert (LLNL).
- -- BGQ - Added logic to keep track of cnodes in an error state inside of a
- booted block.
- -- Added the ability to update a node's NodeAddr and NodeHostName with
- scontrol. Also enable setting a node's state to "future" using scontrol.
- -- Add a node state flag of CLOUD and save/restore NodeAddr and NodeHostName
- information for nodes with a flag of CLOUD.
- -- Cray: Add support for job reservations with node IDs that are not in
- numeric order. Fix for Bugzilla #5.
- -- BGQ - Fix issue with smap -R
- -- Fix association limit support for jobs queued for multiple partitions.
- -- BLUEGENE - fix issue for sub-midplane systems to create a full system
- block correctly.
- -- BLUEGENE - Added option to the bluegene.conf to tell you are running on
- a sub midplane system.
- -- Added the UserID of the user issuing the RPC to the job_submit/lua
- functions.
- -- Fixed issue where if a job ended with ESLURMD_UID_NOT_FOUND and
- ESLURMD_GID_NOT_FOUND where slurm would be a little over zealous
- in treating missing a GID or UID as a fatal error.
- -- If job time limit exceeds partition maximum, but job's minimum time limit
- does not, set job's time limit to partition maximum at allocation time.
- * Changes in SLURM 2.3.6
- ========================
- -- Fix DefMemPerCPU for partition definitions.
- -- Fix to create a reservation with licenses and no nodes.
- -- Fix issue with assoc_mgr if a bad state file is given and the database
- isn't up at the time the slurmctld starts, not running the
- priority/multifactor plugin, and then the database is started up later.
- -- Gres: If a gres has a count of one and an associated file then when doing
- a reconfiguration, the node's bitmap was not cleared resulting in an
- underflow upon job termination or removal from scheduling matrix by the
- backfill scheduler.
- -- Fix race condition in job dependency logic which can result in invalid
- memory reference.
- * Changes in SLURM 2.3.5
- ========================
- -- Improve support for overlapping advanced reservations. Patch from
- Bill Brophy, Bull.
- -- Modify Makefiles for support of Debian hardening flags. Patch from
- Simon Ruderich.
- -- CRAY: Fix support for configuration with SlurmdTimeout=0 (never mark
- node that is DOWN in ALPS as DOWN in SLURM).
- -- Fixed the setting of SLURM_SUBMIT_DIR for jobs submitted by Moab (BZ#1467).
- Patch by Don Lipari, LLNL.
- -- Correction to init.d/slurmdbd exit code for status option. Patch by Bill
- Brophy, Bull.
- -- When the optional max_time is not specified for --switches=count, the site
- max (SchedulerParameters=max_switch_wait=seconds) is used for the job.
- Based on patch from Rod Schultz.
- -- Fix bug in select/cons_res plugin when used with topology/tree and a node
- range count in job allocation request.
- -- Fixed moab_2_slurmdb.pl script to correctly work for end records.
- -- Add support for new SchedulerParameters of max_depend_depth defining the
- maximum number of jobs to test for circular dependencies (i.e. job A waits
- for job B to start and job B waits for job A to start). Default value is
- 10 jobs.
- -- Fix potential race condition if MinJobAge is very low (i.e. 1) and using
- slurmdbd accounting and running large amounts of jobs (>50 sec). Job
- information could be corrupted before it had a chance to reach the DBD.
- -- Fix state restore of job limit set from admin value for min_cpus.
- -- Fix clearing of limit values if an admin removes the limit for max cpus
- and time limit where it was previously set by an admin.
- -- Fix issue where log message is more than 256 chars and then has a format.
- -- Fix sched/wiki2 to support job account name, gres, partition name, wckey,
- or working directory that contains "#" (a job record separator). Also fix
- for wckey or working directory that contains a double quote '\"'.
- -- CRAY - fix for handling memory requests from user for an allocation.
- -- Add support for switches parameter to the job_submit/lua plugin. Work by
- Par Andersson, NSC.
- -- Fix to job preemption logic to preempt multiple jobs at the same time.
- -- Fix minor issue where uid and gid were switched in sview for submitting
- batch jobs.
- -- Fix possible illegal memory reference in slurmctld for job step with
- relative option. Work by Matthieu Hautreux (CEA).
- -- Reset priority of system held jobs when dependency is satisfied. Work by
- Don Lipari, LLNL.
- * Changes in SLURM 2.3.4
- ========================
- -- Set DEFAULT flag in partition structure when slurmctld reads the
- configuration file. Patch from Rémi Palancher.
- -- Fix for possible deadlock in accounting logic: Avoid calling
- jobacct_gather_g_getinfo() until there is data to read from the socket.
- -- Fix typo in accounting when using reservations. Patch from Alejandro
- Lucero Palau.
- -- Fix to the multifactor priority plugin to calculate effective usage earlier
- to give a correct priority on the first decay cycle after a restart of the
- slurmctld. Patch from Martin Perry, Bull.
- -- Permit user root to run a job step for any job as any user. Patch from
- Didier Gazen, Laboratoire d'Aerologie.
- -- BLUEGENE - fix for not allowing jobs if all midplanes are drained and all
- blocks are in an error state.
- -- Avoid slurmctld abort due to bad pointer when setting an advanced
- reservation MAINT flag if it contains no nodes (only licenses).
- -- Fix bug when requeued batch job is scheduled to run on a different node
- zero, but attemts job launch on old node zero.
- -- Fix bug in step task distribution when nodes are not configured in numeric
- order. Patch from Hongjia Cao, NUDT.
- -- Fix for srun allocating running within existing allocation with --exclude
- option and --nnodes count small enough to remove more nodes. Patch from
- Phil Eckert, LLNL.
- -- Work around to handle certain combinations of glibc/kernel
- (i.e. glibc-2.14/Linux-3.1) to correctly open the pty of the slurmstepd
- as the job user. Patch from Mark Grondona, LLNL.
- -- Modify linking to include "-ldl" only when needed. Patch from Aleksej
- Saushev.
- -- Fix smap regression to display nodes that are drained or down correctly.
- -- Several bug fixes and performance improvements with related to batch
- scripts containing very large numbers of arguments. Patches from Par
- Andersson, NSC.
- -- Fixed extremely hard to reproduce threading issue in assoc_mgr.
- -- Correct "scontrol show daemons" output if there is more than one
- ControlMachine configured.
- -- Add node read lock where needed in slurmctld/agent code.
- -- Added test for LUA library named "liblua5.1.so.0" in addition to
- "liblua5.1.so" as needed by Debian. Patch by Remi Palancher.
- -- Added partition default_time field to job_submit LUA plugin. Patch by
- Remi Palancher.
- -- Fix bug in cray/srun wrapper stdin/out/err file handling.
- -- In cray/srun wrapper, only include aprun "-q" option when srun "--quiet"
- option is used.
- -- BLUEGENE - fix issue where if a small block was in error it could hold up
- the queue when trying to place a larger than midplane job.
- -- CRAY - ignore all interactive nodes and jobs on interactive nodes.
- -- Add new job state reason of "FrontEndDown" which applies only to Cray and
- IBM BlueGene systems.
- -- Cray - Enable configure option of "--enable-salloc-background" to permit
- the srun and salloc commands to be executed in the background. This does
- NOT remove the ALPS limitation that only one job reservation can be created
- for each Linux session ID.
- -- Cray - For srun wrapper when creating a job allocation, set the default job
- name to the executable file's name.
- -- Add support for Cray ALPS 5.0.0
- -- FRONTEND - if a front end unexpectedly reboots kill all jobs but don't
- mark front end node down.
- -- FRONTEND - don't down a front end node if you have an epilog error.
- -- Cray - fix for if a frontend slurmd was started after the slurmctld had
- already pinged it on startup the unresponding flag would be removed from
- the frontend node.
- -- Cray - Fix issue on smap not displaying grid correctly.
- -- Fixed minor memory leak in sview.
- * Changes in SLURM 2.3.3
- ========================
- -- Fix task/cgroup plugin error when used with GRES. Patch by Alexander
- Bersenev (Institute of Mathematics and Mechanics, Russia).
- -- Permit pending job exceeding a partition limit to run if its QOS flag is
- modified to permit the partition limit to be exceeded. Patch from Bill
- Brophy, Bull.
- -- BLUEGENE - Fixed preemption issue.
- -- sacct search for jobs using filtering was ignoring wckey filter.
- -- Fixed issue with QOS preemption when adding new QOS.
- -- Fixed issue with comment field being used in a job finishing before it
- starts in accounting.
- -- Add slashes in front of derived exit code when modifying a job.
- -- Handle numeric suffix of "T" for terabyte units. Patch from John Thiltges,
- University of Nebraska-Lincoln.
- -- Prevent resetting a held job's priority when updating other job parameters.
- Patch from Alejandro Lucero Palau, BSC.
- -- Improve logic to import a user's environment. Needed with --get-user-env
- option used with Moab. Patch from Mark Grondona, LLNL.
- -- Fix bug in sview layout if node count less than configured grid_x_width.
- -- Modify PAM module to prefer to use SLURM library with same major release
- number that it was built with.
- -- Permit gres count configuration of zero.
- -- Fix race condition where sbcast command can result in deadlock of slurmd
- daemon. Patch by Don Albert, Bull.
- -- Fix bug in srun --multi-prog configuration file to avoid printing duplicate
- record error when "*" is used at the end of the file for the task ID.
- -- Let operators see reservation data even if "PrivateData=reservations" flag
- is set in slurm.conf. Patch from Don Albert, Bull.
- -- Added new sbatch option "--export-file" as needed for latest version of
- Moab. Patch from Phil Eckert, LLNL.
- -- Fix for sacct printing CPUTime(RAW) where the the is greater than a 32 bit
- number.
- -- Fix bug in --switch option with topology resulting in bad switch count use.
- Patch from Alejandro Lucero Palau (Barcelona Supercomputer Center).
- -- Fix PrivateFlags bug when using Priority Multifactor plugin. If using sprio
- all jobs would be returned even if the flag was set.
- Patch from Bill Brophy, Bull.
- -- Fix for possible invalid memory reference in slurmctld in job dependency
- logic. Patch from Carles Fenoy (Barcelona Supercomputer Center).
- * Changes in SLURM 2.3.2
- ========================
- -- Add configure option of "--without-rpath" which builds SLURM tools without
- the rpath option, which will work if Munge and BlueGene libraries are in
- the default library search path and make system updates easier.
- -- Fixed issue where if a job ended with ESLURMD_UID_NOT_FOUND and
- ESLURMD_GID_NOT_FOUND where slurm would be a little over zealous
- in treating missing a GID or UID as a fatal error.
- -- Backfill scheduling - Add SchedulerParameters configuration parameter of
- "bf_res" to control the resolution in the backfill scheduler's data about
- when jobs begin and end. Default value is 60 seconds (used to be 1 second).
- -- Cray - Remove the "family" specification from the GPU reservation request.
- -- Updated set_oomadj.c, replacing deprecated oom_adj reference with
- oom_score_adj
- -- Fix resource allocation bug, generic resources allocation was ignoring the
- job's ntasks_per_node and cpus_per_task parameters. Patch from Carles
- Fenoy, BSC.
- -- Avoid orphan job step if slurmctld is down when a job step completes.
- -- Fix Lua link order, patch from Pär Andersson, NSC.
- -- Set SLURM_CPUS_PER_TASK=1 when user specifies --cpus-per-task=1.
- -- Fix for fatal error managing GRES. Patch by Carles Fenoy, BSC.
- -- Fixed race condition when using the DBD in accounting where if a job
- wasn't started at the time the eligible message was sent but started
- before the db_index was returned information like start time would be lost.
- -- Fix issue in accounting where normalized shares could be updated
- incorrectly when getting fairshare from the parent.
- -- Fixed if not enforcing associations but want QOS support for a default
- qos on the cluster to fill that in correctly.
- -- Fix in select/cons_res for "fatal: cons_res: sync loop not progressing"
- with some configurations and job option combinations.
- -- BLUEGNE - Fixed issue with handling HTC modes and rebooting.
- * Changes in SLURM 2.3.1
- ========================
- -- Do not remove the backup slurmctld's pid file when it assumes control, only
- when it actually shuts down. Patch from Andriy Grytsenko (Massive Solutions
- Limited).
- -- Avoid clearing a job's reason from JobHeldAdmin or JobHeldUser when it is
- otherwise updated using scontrol or sview commands. Patch based upon work
- by Phil Eckert (LLNL).
- -- BLUEGENE - Fix for if changing the defined blocks in the bluegene.conf and
- jobs happen to be running on blocks not in the new config.
- -- Many cosmetic modifications to eliminate warning message from GCC version
- 4.6 compiler.
- -- Fix for sview reservation tab when finding correct reservation.
- -- Fix for handling QOS limits per user on a reconfig of the slurmctld.
- -- Do not treat the absence of a gres.conf file as a fatal error on systems
- configured with GRES, but set GRES counts to zero.
- -- BLUEGENE - Update correctly the state in the reason of a block if an
- admin sets the state to error.
- -- BLUEGENE - handle reason of blocks in error more correctly between
- restarts of the slurmctld.
- -- BLUEGENE - Fix minor potential memory leak when setting block error reason.
- -- BLUEGENE - Fix if running in Static/Overlap mode and full system block
- is in an error state, won't deny jobs.
- -- Fix for accounting where your cluster isn't numbered in counting order
- (i.e. 1-9,0 instead of 0-9). The bug would cause 'sacct -N nodename' to
- not give correct results on these systems.
- -- Fix to GRES allocation logic when resources are associated with specific
- CPUs on a node. Patch from Steve Trofinoff, CSCS.
- -- Fix bugs in sched/backfill with respect to QOS reservation support and job
- time limits. Patch from Alejandro Lucero Palau (Barcelona Supercomputer
- Center).
- -- BGQ - fix to set up corner correctly for sub block jobs.
- -- Major re-write of the CPU Management User and Administrator Guide (web
- page) by Martin Perry, Bull.
- -- BLUEGENE - If removing blocks from system that once existed cleanup of old
- block happens correctly now.
- -- Prevent slurmctld crashing with configuration of MaxMemPerCPU=0.
- -- Prevent job hold by operator or account coordinator of his own job from
- being an Administrator Hold rather than User Hold by default.
- -- Cray - Fix for srun.pl parsing to avoid adding spaces between option and
- argument (e.g. "-N2" parsed properly without changing to "-N 2").
- -- Major updates to cgroup support by Mark Grondona (LLNL) and Matthieu
- Hautreux (CEA) and Sam Lang. Fixes timing problems with respect to the
- task_epilog. Allows cgroup mount point to be configurable. Added new
- configuration parameters MaxRAMPercent and MaxSwapPercent. Allow cgroup
- configuration parameters that are precentages to be floating point.
- -- Fixed issue where sview wasn't displaying correct nice value for jobs.
- -- Fixed issue where sview wasn't displaying correct min memory per node/cpu
- value for jobs.
- -- Disable some SelectTypeParameters for select/linear that aren't compatible.
- -- Move slurm_select_init to proper place to avoid loading multiple select
- plugins in the slurmd.
- -- BGQ - Include runjob_plugin.so in the bluegene rpm.
- -- Report correct job "Reason" if needed nodes are DOWN, DRAINED, or
- NOT_RESPONDING, "Resources" rather than "PartitionNodeLimit".
- -- BLUEGENE - Fixed issues with running on a sub-midplane system.
- -- Added some missing calls to allow older versions of SLURM to talk to newer.
- -- BGQ - allow steps to be ran.
- -- Do not attempt to run HeathCheckProgram on powered down nodes. Patch from
- Ramiro Alba, Centre Tecnològic de Tranferència de Calor, Spain.
- * Changes in SLURM 2.3.0-2
- ==========================
- -- Fix for memory issue inside sview.
- -- Fix issue where if a job was pending and the slurmctld was restarted a
- variable wasn't initialized in the job structure making it so that job
- wouldn't run.
- * Changes in SLURM 2.3.0
- ========================
- -- BLUEGENE - make sure we only set the jobinfo_select start_loc on a job
- when we are on a small block, not a regular one.
- -- BGQ - fix issue where not copying the correct amount of memory.
- -- BLUEGENE - fix clean start if jobs were running when the slurmctld was
- shutdown and then the system size changed. This would probably only happen
- if you were emulating a system.
- -- Fix sview for calling a cray system from a non-cray system to get the
- correct geometry of the system.
- -- BLUEGENE - fix to correctly import pervious version of block state file.
- -- BLUEGENE - handle loading better when doing a clean start with static
- blocks.
- -- Add sinfo format and sort option "%n" for NodeHostName and "%o" for
- NodeAddr.
- -- If a job is deferred due to partition limits, then re-test those limits
- after a partition is modified. Patch from Don Lipari.
- -- Fix bug which would crash slurmcld if job's owner (not root) tries to clear
- a job's licenses by setting value to "".
- -- Cosmetic fix for printing out debug info in the priority plugin.
- -- In sview when switching from a bluegene machine to a regular linux cluster
- and vice versa the node->base partition lists will be displayed if setup
- in your .slurm/sviewrc file.
- -- BLUEGENE - Fix for creating full system static block on a BGQ system.
- -- BLUEGENE - Fix deadlock issue if toggling between Dynamic and Static block
- allocation with jobs running on blocks that don't exist in the static
- setup.
- -- BLUEGENE - Modify code to only give HTC states to BGP systems and not
- allow them on Q systems.
- -- BLUEGENE - Make it possible for an admin to define multiple dimension
- conn_types in a block definition.
- -- BGQ - Alter tools to output multiple dimensional conn_type.
- * Changes in SLURM 2.3.0.rc2
- ============================
- -- With sched/wiki or sched/wiki2 (Maui or Moab scheduler), insure that a
- requeued job's priority is reset to zero.
- -- BLUEGENE - fix to run steps correctly in a BGL/P emulated system.
- -- Fixed issue where if there was a network issue between the slurmctld and
- the DBD where both remained up but were disconnected the slurmctld would
- get registered again with the DBD.
- -- Fixed issue where if the DBD connection from the ctld goes away because of
- a POLLERR the dbd_fail callback is called.
- -- BLUEGENE - Fix to smap command-line mode display.
- -- Change in GRES behavior for job steps: A job step's default generic
- resource allocation will be set to that of the job. If a job step's --gres
- value is set to "none" then none of the generic resources which have been
- allocated to the job will be allocated to the job step.
- -- Add srun environment value of SLURM_STEP_GRES to set default --gres value
- for a job step.
- -- Require SchedulerTimeSlice configuration parameter to be at least 5 seconds
- to avoid thrashing slurmd daemon.
- -- Cray - Fix to make nodes state in accounting consistent with state set by
- ALPS.
- -- Cray - A node DOWN to ALPS will be marked DOWN to SLURM only after reaching
- SlurmdTimeout. In the interim, the node state will be NO_RESPOND. This
- change makes behavior makes SLURM handling of the node DOWN state more
- consistent with ALPS. This change effects only Cray systems.
- -- Cray - Fix to work with 4.0.* instead of just 4.0.0
- -- Cray - Modify srun/aprun wrapper to map --exclusive to -F exclusive and
- --share to -F share. Note this does not consider the partition's Shared
- configuration, so it is an imperfect mapping of options.
- -- BLUEGENE - Added notice in the print config to tell if you are emulated
- or not.
- -- BLUEGENE - Fix job step scalability issue with large task count.
- -- BGQ - Improved c-node selection when asked for a sub-block job that
- cannot fit into the available shape.
- -- BLUEGENE - Modify "scontrol show step" to show I/O nodes (BGL and BGP) or
- c-nodes (BGQ) allocated to each step. Change field name from "Nodes=" to
- "BP_List=".
- -- Code cleanup on step request to get the correct select_jobinfo.
- -- Memory leak fixed for rolling up accounting with down clusters.
- -- BGQ - fix issue where if first job step is the entire block and then the
- next parallel step is ran on a sub block, SLURM won't over subscribe cnodes.
- -- Treat duplicate switch name in topology.conf as fatal error. Patch from Rod
- Schultz, Bull
- -- Minor update to documentation describing the AllowGroups option for a
- partition in the slurm.conf.
- -- Fix problem with _job_create() when not using qos's. It makes
- _job_create() consistent with similar logic in select_nodes().
- -- GrpCPURunMins in a QOS flushed out.
- -- Fix for squeue -t "CONFIGURING" to actually work.
- -- CRAY - Add cray.conf parameter of SyncTimeout, maximum time to defer job
- scheduling if SLURM node or job state are out of synchronization with ALPS.
- -- If salloc was run as interactive, with job control, reset the foreground
- process group of the terminal to the process group of the parent pid before
- exiting. Patch from Don Albert, Bull.
- -- BGQ - set up the corner of a sub block correctly based on a relative
- position in the block instead of absolute.
- -- BGQ - make sure the recently added select_jobinfo of a step launch request
- isn't sent to the slurmd where environment variables would be overwritten
- incorrectly.
- * Changes in SLURM 2.3.0.rc1
- ============================
- -- NOTE THERE HAVE BEEN NEW FIELDS ADDED TO THE JOB AND PARTITION STATE SAVE
- FILES AND RPCS. PENDING AND RUNNING JOBS WILL BE LOST WHEN UPGRADING FROM
- EARLIER VERSION 2.3 PRE-RELEASES AND RPCS WILL NOT WORK WITH EARLIER
- VERSIONS.
- -- select/cray: Add support for Accelerator information including model and
- memory options.
- -- Cray systems: Add support to suspend/resume salloc command to insure that
- aprun does not get initiated when the job is suspended. Processes suspended
- and resumed are determined by using process group ID and parent process ID,
- so some processes may be missed. Since salloc runs as a normal user, it's
- ability to identify processes associated with a job is limited.
- -- Cray systems: Modify smap and sview to display all nodes even if multiple
- nodes exist at each coordinate.
- -- Improve efficiency of select/linear plugin with topology/tree plugin
- configured, Patch by Andriy Grytsenko (Massive Solutions Limited).
- -- For front-end architectures on which job steps are run (emulated Cray and
- BlueGene systems only), fix bug that would free memory still in use.
- -- Add squeue support to display a job's license information. Patch by Andy
- Roosen (University of Deleware).
- -- Add flag to the select APIs for job suspend/resume indicating if the action
- is for gang scheduling or an explicit job suspend/resume by the user. Only
- an explicit job suspend/resume will reset the job's priority and make
- resources exclusively held by the job available to other jobs.
- -- Fix possible invalid memory reference in sched/backfill. Patch by Andriy
- Grytsenko (Massive Solutions Limited).
- -- Add select_jobinfo to the task launch RPC. Based upon patch by Andriy
- Grytsenko (Massive Solutions Limited).
- -- Add DefMemPerCPU/Node and MaxMemPerCPU/Node to partition configuration.
- This improves flexibility when gang scheduling only specific partitions.
- -- Added new enums to print out when a job is held by a QOS instead of an
- association limit.
- -- Enhancements to sched/backfill performance with select/cons_res plugin.
- Patch from Bjørn-Helge Mevik, University of Oslo.
- -- Correct job run time reported by smap for suspended jobs.
- -- Improve job preemption logic to avoid preempting more jobs than needed.
- -- Add contribs/arrayrun tool providing support for job arrays. Contributed by
- Bjørn-Helge Mevik, University of Oslo. NOTE: Not currently packaged as RPM
- and manual file editing is required.
- -- When suspending a job, wait 2 seconds instead of 1 second between sending
- SIGTSTP and SIGSTOP. Some MPI implementation were not stopping within the
- 1 second delay.
- -- Add support for managing devices based upon Linux cgroup container. Based
- upon patch by Yiannis Georgiou, Bull.
- -- Fix memory buffering bug if a AllowGroups parameter of a partition has 100
- or more users. Patch by Andriy Grytsenko (Massive Solutions Limited).
- -- Fix bug in generic resource tracking of gres associated with specific CPUs.
- Resources were being over-allocated.
- -- On systems with front-end nodes (IBM BlueGene and Cray) limit batch jobs to
- only one CPU of these shared resources.
- -- Set SLURM_MEM_PER_CPU or SLURM_MEM_PER_NODE environment variables for both
- interactive (salloc) and batch jobs if the job has a memory limit. For Cray
- systems also set CRAY_AUTO_APRUN_OPTIONS environment variable with the
- memory limit.
- -- Fix bug in select/cons_res task distribution logic when tasks-per-node=0.
- Patch from Rod Schultz, Bull.
- -- Restore node configuration information (CPUs, memory, etc.) for powered
- down when slurmctld daemon restarts rather than waiting for the node to be
- restored to service and getting the information from the node (NOTE: Only
- relevent if FastSchedule=0).
- -- For Cray systems with the srun2aprun wrapper, rebuild the srun man page
- identifying the srun optioins which are valid on that system.
- -- BlueGene: Permit users to specify a separate connection type for each
- dimension (e.g. "--conn-type=torus,mesh,torus").
- -- Add the ability for a user to limit the number of leaf switches in a job's
- allocation using the --switch option of salloc, sbatch and srun. There is
- also a new SchedulerParameters value of max_switch_wait, which a SLURM
- administrator can used to set a maximum job delay and prevent a user job
- from blocking lower priority jobs for too long. Based on work by Rod
- Schultz, Bull.
- * Changes in SLURM 2.3.0.pre6
- =============================
- -- NOTE: THERE HAS BEEN A NEW FIELD ADDED TO THE CONFIGURATION RESPONSE RPC
- AS SHOWN BY "SCONTROL SHOW CONFIG". THIS FUNCTION WILL ONLY WORK WHEN THE
- SERVER AND CLIENT ARE BOTH RUNNING SLURM VERSION 2.3.0.pre6
- -- Modify job expansion logic to support licenses, generic resources, and
- currently running job steps.
- -- Added an rpath if using the --with-munge option of configure.
- -- Add support for multiple sets of DEFAULT node, partition, and frontend
- specifications in slurm.conf so that default values can be changed mulitple
- times as the configuration file is read.
- -- BLUEGENE - Improved logic to place small blocks in free space before freeing
- larger blocks.
- -- Add optional argument to srun's --kill-on-bad-exit so that user can set
- its value to zero and override a SLURM configuration parameter of
- KillOnBadExit.
- -- Fix bug in GraceTime support for preempted jobs that prevented proper
- operation when more than one job was being preempted. Based on patch from
- Bill Brophy, Bull.
- -- Fix for running sview from a non-bluegene cluster to a bluegene cluster.
- Regression from pre5.
- -- If job's TMPDIR environment is not set or is not usable, reset to "/tmp".
- Patch from Andriy Grytsenko (Massive Solutions Limited).
- -- Remove logic for defunct RPC: DBD_GET_JOBS.
- -- Propagate DebugFlag changes by scontrol to the plugins.
- -- Improve accuracy of REQUEST_JOB_WILL_RUN start time with respect to higher
- priority pending jobs.
- -- Add -R/--reservation option to squeue command as a job filter.
- -- Add scancel support for --clusters option.
- -- Note that scontrol and sprio can only support a single cluster at one time.
- -- Add support to salloc for a new environment variable SALLOC_KILL_CMD.
- -- Add scontrol ability to increment or decrement a job or step time limit.
- -- Add support for SLURM_TIME_FORMAT environment variable to control time
- stamp output format. Work by Gerrit Renker, CSCS.
- -- Fix error handling in mvapich plugin that could cause srun to enter an
- infinite loop under rare circumstances.
- -- Add support for multiple task plugins. Patch from Andriy Grytsenko (Massive
- Solutions Limited).
- -- Addition of per-user node/cpu limits for QOS's. Patch from Aaron Knister,
- UMBC.
- -- Fix logic for multiple job resize operations.
- -- BLUEGENE - many fixes to make things work correctly on a L/P system.
- -- Fix bug in layout of job step with --nodelist option plus node count. Old
- code could allocate too few nodes.
- * Changes in SLURM 2.3.0.pre5
- =============================
- -- NOTE: THERE HAS BEEN A NEW FIELD ADDED TO THE JOB STATE FILE. UPGRADES FROM
- VERSION 2.3.0-PRE4 WILL RESULT IN LOST JOBS UNLESS THE "orig_dependency"
- FIELD IS REMOVED FROM JOB STATE SAVE/RESTORE LOGIC. ON CRAY SYSTEMS A NEW
- "confirm_cookie" FIELD WAS ADDED AND HAS THE SAME EFFECT OF DISABLING JOB
- STATE RESTORE.
- -- BLUEGENE - Improve speed of start up when removing blocks at the beginning.
- -- Correct init.d/slurm status to have non-zero exit code if ANY Slurm
- damon that should be running on the node is not running. Patch from Rod
- Schulz, Bull.
- -- Improve accuracy of response to "srun --test-only jobid=#".
- -- Fix bug in front-end configurations which reports job_cnt_comp underflow
- errors after slurmctld restarts.
- -- Eliminate "error from _trigger_slurmctld_event in backup.c" due to lack of
- event triggers.
- -- Fix logic in BackupController to properly recover front-end node state and
- avoid purging active jobs.
- -- Added man pages to html pages and the new cpu_management.html page.
- Submitted by Martin Perry / Rod Schultz, Bull.
- -- Job dependency information will only show the currently active dependencies
- rather than the original dependencies. From Dan Rusak, Bull.
- -- Add RPCs to get the SPANK environment variables from the slurmctld daemon.
- Patch from Andrej N. Gritsenko.
- -- Updated plugins/task/cgroup/task_cgroup_cpuset.c to support newer
- HWLOC_API_VERSION.
- -- Do not build select/bluegene plugin if C++ compiler is not installed.
- -- Add new configure option --with-srun2aprun to build an srun command
- which is a wrapper over Cray's aprun command and supports many srun
- options. Without this option, the srun command will advise the user
- to use the aprun command.
- -- Change container ID supported by proctrack plugin from 32-bit to 64-bit.
- -- Added contribs/cray/libalps_test_programs.tar.gz with tools to validate
- SLURM's logic used to support Cray systems.
- -- Create RPM for srun command that is a wrapper for the Cray/ALPS aprun
- command. Dependent upon .rpmmacros parameter of "%_with_srun2aprun".
- -- Add configuration parameter MaxStepCount to limit effect of bad batch
- scripts.
- -- Moving to github
- -- Fix for handling a 2.3 system talking to a 2.2 slurmctld.
- -- Add contribs/lua/job_submit.license.lua script. Update job_submit and Lua
- related documentation.
- -- Test if _make_batch_script() is called with a NULL script.
- -- Increase hostlist support from 24k to 64k nodes.
- -- Renamed the Accounting Storage database's "DerivedExitString" job field to
- "Comment". Provided backward compatible support for "DerivedExitString" in
- the sacctmgr tool.
- -- Added the ability to save the job's comment field to the Accounting
- Storage db (to the formerly named, "DerivedExitString" job field). This
- behavior is enabled by a new slurm.conf parameter:
- AccountingStoreJobComment.
- -- Test if _make_batch_script() is called with a NULL script.
- -- Increase hostlist support from 24k to 64k nodes.
- -- Fix srun to handle signals correctly when waiting for a step creation.
- -- Preserve the last job ID across slurmctld daemon restarts even if the job
- state file can not be fully recovered.
- -- Made the hostlist functions be able to arbitrarily handle any size
- dimension no matter what the size of the cluster is in dimensions.
- * Changes in SLURM 2.3.0.pre4
- =============================
- -- Add GraceTime to Partition and QOS data structures. Preempted jobs will be
- given this time interval before termination. Work by Bill Brophy, Bull.
- -- Add the ability for scontrol and sview to modify slurmctld DebugFlags
- values.
- -- Various Cray-specific patches:
- - Fix a bug in distinguishing XT from XE.
- - Avoids problems with empty nodenames on Cray.
- - Check whether ALPS is hanging on to nodes, which happens if ALPS has not
- yet cleaned up the node partition.
- - Stops select/cray from clobbering node_ptr->reason.
- - Perform 'safe' release of ALPS reservations using inventory and apkill.
- - Compile-time sanity check for the apbasil and apkill files.
- - Changes error handling in do_basil_release() (called by
- select_g_job_fini()).
- - Warn that salloc --no-shell option is not supported on Cray systems.
- -- Add a reservation flag of "License_Only". If set, then jobs using the
- reservation may use the licenses associated with it plus any compute nodes.
- Otherwise the job is limited to the compute nodes associated with the
- reservation.
- -- Change slurm.conf node configuration parameter from "Procs" to "CPUs".
- Both parameters will be supported for now.
- -- BLUEGENE - fix for when user requests only midplane names with no count at
- job submission time to process the node count correctly.
- -- Fix job step resource allocation problem when both node and tasks counts
- are specified. New logic selects nodes with larger CPU counts as needed.
- -- BGQ - make it so srun wraps runjob (still under construction, but works
- for most cases)
- -- Permit a job's QOS and Comment field to both change in a single RPC. This
- was previously disabled since Moab stored the QOS within the Comment field.
- -- Add support for jobs to expand in size. Submit additional batch job with
- the option "--dependency=expand:<jobid>". See web page "faq.html#job_size"
- for details. Restrictions to be removed in the future.
- -- Added --with-alps-emulation to configure, and also an optional cray.conf
- to setup alps location and database information.
- -- Modify PMI data types from 16-bits to 32-bits in order to support MPICH2
- jobs with more than 65,536 tasks. Patch from Hongjia Cao, NUDT.
- -- Set slurmd's soft process CPU limit equal to it's hard limit and notify the
- user if the limit is not infinite.
- -- Added proctrack/cgroup and task/cgroup plugins from Matthieu Hautreux, CEA.
- -- Fix slurmctld restart logic that could leave nodes in UNKNOWN state for a
- longer time than necessary after restart.
- * Changes in SLURM 2.3.0.pre3
- =============================
- -- BGQ - Appears to work correctly in emulation mode, no sub blocks just yet.
- -- Minor typos fixed
- -- Various bug fixes for Cray systems.
- -- Fix bug that when setting a compute node to idle state, it was failing to
- set the systems up_node_bitmap.
- -- BLUEGENE - code reorder
- -- BLUEGENE - Now only one select plugin for all Bluegene systems.
- -- Modify srun to set the SLURM_JOB_NAME environment variable when srun is
- used to create a new job allocation. Not set when srun is used to create a
- job step within an existing job allocation.
- -- Modify init.d/slurm script to start multiple slurmd daemons per compute
- node if so configured. Patch from Matthieu Hautreux, CEA.
- -- Change license data structure counters from uint16_t to uint32_t to support
- larger license counts.
- * Changes in SLURM 2.3.0.pre2
- =============================
- -- Log a job's requeue or cancellation due to preemption to that job's stderr:
- "*** JOB 65547 CANCELLED AT 2011-01-21T12:59:33 DUE TO PREEMPTION ***".
- -- Added new job termination state of JOB_PREEMPTED, "PR" or "PREEMPTED" to
- indicate job termination was due to preemption.
- -- Optimize advanced reservations resource selection for computer topology.
- The logic has been added to select/linear and select/cons_res, but will
- not be enabled until the other select plugins are modified.
- -- Remove checkpoint/xlch plugin.
- -- Disable deletion of partitions that have unfinished jobs (pending,
- running or suspended states). Patch from Martin Perry, BULL.
- -- In sview, disable the sorting of node records by name at startup for
- clusters over 1000 nodes. Users can enable this by selecting the "Name"
- tab. This change dramatically improves scalability of sview.
- -- Report error when trying to change a node's state from scontrol for Cray
- systems.
- -- Do not attempt to read the batch script for non-batch jobs. This patch
- eliminates some inappropriate error messages.
- -- Preserve NodeHostName when reordering nodes due to system topology.
- -- On Cray/ALPS systems do node inventory before scheduling jobs.
- -- Disable some salloc options on Cray systems.
- -- Disable scontrol's wait_job command on Cray systems.
- -- Disable srun command on native Cray/ALPS systems.
- -- Updated configure option "--enable-cray-emulation" (still under
- development) to emulate a cray XT/XE system, and auto-detect a real
- Cray XT/XE systems (removed no longer needed --enable-cray configure
- option). Building on native Cray systems requires the
- cray-MySQL-devel-enterprise rpm and expat XML parser library/headers.
- * Changes in SLURM 2.3.0.pre1
- =============================
- -- Added that when a slurmctld closes the connection to the database it's
- registered host and port are removed.
- -- Added flag to slurmdbd.conf TrackSlurmctldDown where if set will mark idle
- resources as down on a cluster when a slurmctld disconnects or is no
- longer reachable.
- -- Added support for more than one front-end node to run slurmd on
- architectures where the slurmd does not execute on the compute nodes
- (e.g. BlueGene). New configuration parameters FrontendNode and FrontendAddr
- added. See "man slurm.conf" for more information.
- -- With the scontrol show job command when using the --details option, show
- a batch job's script.
- -- Add ability to create reservations or partitions and submit batch jobs
- using sview. Also add the ability to delete reservations and partitions.
- -- Added new configuration parameter MaxJobId. Once reached, restart job ID
- values at FirstJobId.
- -- When restarting slurmctld with priority/basic, increment all job priorities
- so the highest job priority becomes TOP_PRIORITY.
- * Changes in SLURM 2.2.8
- ========================
- -- Prevent background salloc disconnecting terminal at termination. Patch by
- Don Albert, Bull.
- -- Fixed issue where preempt mode is skipped when creating a QOS. Patch by
- Bill Brophy, Bull.
- -- Fixed documention (html) for PriorityUsageResetPeriod to match that in the
- man pages. Patch by Nancy Kritkausky, Bull.
- * Changes in SLURM 2.2.7
- ========================
- -- Eliminate zombie process created if salloc exits with stopped child
- process. Patch from Gerrit Renker, CSCS.
- -- With default configuration on non-Cray systems, enable salloc to be
- spawned as a background process. Based upon work by Don Albert (Bull) and
- Gerrit Renker (CSCS).
- -- Fixed Regression from 2.2.4 in accounting where an inherited limit
- would not be set correctly in the added child association.
- -- Fixed issue with accounting when asking for jobs with a hostlist.
- -- Avoid clearing a node's Arch, OS, BootTime and SlurmdStartTime when
- "scontrol reconfig" is run. Patch from Martin Perry, Bull.
- * Changes in SLURM 2.2.6
- ========================
- -- Fix displaying of account coordinators with sacctmgr. Possiblity to show
- deleted accounts. Only a cosmetic issue, since the accounts are already
- deleted, and have no associations.
- -- Prevent opaque ncurses WINDOW struct on OS X 10.6.
- -- Fix issue with accounting when using PrivateData=jobs... users would not be
- able to view there own jobs unless they were admin or coordinators which is
- obviously wrong.
- -- Fix bug in node stat if slurmctld is restarted while nodes are in the
- process of being powered up. Patch from Andriy Grytsenko.
- -- Change maximum batch script size from 128k to 4M.
- -- Get slurmd -f option working. Patch from Andriy Grytsenko.
- -- Fix for linking problem on OSX. Patches from Jon Bringhurst (LANL) and
- Tyler Strickland.
- -- Reset a job's priority to zero (suspended) when Moab requeues the job.
- Patch from Par Andersson, NSC.
- -- When enforcing accounting, fix polling for unknown uids for users after
- the slurmctld started. Previously one would have to issue a reconfigure
- to the slurmctld to have it look for new uids.
- -- BLUEGENE - if a block goes into an error state. Fix issue where accounting
- wasn't updated correctly when the block was resumed.
- -- Synchronize power-save module better with scheduler. Patch from
- Andriy Grytsenko (Massive Solutions Limited).
- -- Avoid SEGV in association logic with user=NULL. Patch from
- Andriy Grytsenko (Massive Solutions Limited).
- -- Fixed issue in accounting where it was possible for a new
- association/wckey to be set incorrectly as a default the new object
- was added after an original default object already existed. Before
- the slurmctld would need to be restarted to fix the issue.
- -- Updated the Normalized Usage section in priority_multifactor.shtml.
- -- Disable use of SQUEUE_FORMAT env var if squeue -l, -o, or -s option is
- used. Patch from Aaron Knister (UMBC).
- * Changes in SLURM 2.2.5
- ========================
- -- Correct init.d/slurm status to have non-zero exit code if ANY Slurm
- damon that should be running on the node is not running. Patch from Rod
- Schulz, Bull.
- -- Improve accuracy of response to "srun --test-only jobid=#".
- -- Correct logic to properly support --ntasks-per-node option in the
- select/cons_res plugin. Patch from Rod Schulz, Bull.
- -- Fix bug in select/cons_res with respect to generic resource (gres)
- scheduling which prevented some jobs from starting as soon as possible.
- -- Fix memory leak in select/cons_res when backfill scheduling generic
- resources (gres).
- -- Fix for when configuring a node with more resources than in real life
- and using task/affinity.
- -- Fix so slurmctld will pack correctly 2.1 step information. (Only needed if
- a 2.1 client is talking to a 2.2 slurmctld.)
- -- Set powered down node's state to IDLE+POWER after slurmctld restart instead
- of leaving in UNKNOWN+POWER. Patch from Andrej Gritsenko.
- -- Fix bug where is srun's executable is not on it's current search path, but
- can be found in the user's default search path. Modify slurmstepd to find
- the executable. Patch from Andrej Gritsenko.
- -- Make sview display correct cpu count for steps.
- -- BLUEGENE - when running in overlap mode make sure to check the connection
- type so you can create overlapping blocks on the exact same nodes with
- different connection types (i.e. one torus, one mesh).
- -- Fix memory leak if MPI ports are reserved (for OpenMPI) and srun's
- --resv-ports option is used.
- -- Fix some anomalies in select/cons_res task layout when using the
- --cpus-per-task option. Patch from Martin Perry, Bull.
- -- Improve backfill scheduling logic when job specifies --ntasks-per-node and
- --mem-per-cpu options on a heterogeneous cluster. Patch from Bjorn-Helge
- Mevik, University of Oslo.
- -- Print warning message if srun specifies --cpus-per-task larger than used
- to create job allocation.
- -- Fix issue when changing a users name in accounting, if using wckeys would
- execute correctly, but bad memcopy would core the DBD. No information
- would be lost or corrupted, but you would need to restart the DBD.
- * Changes in SLURM 2.2.4
- ========================
- -- For batch jobs for which the Prolog fails, substitute the job ID for any
- "%j" in the job's output or error file specification.
- -- Add licenses field to the sview reservation information.
- -- BLUEGENE - Fix for handling extremely overloaded system on Dynamic system
- dealing with starting jobs on overlapping blocks. Previous fallout
- was job would be requeued. (happens very rarely)
- -- In accounting_storage/filetxt plugin, substitute spaces within job names,
- step names, and account names with an underscore to insure proper parsing.
- -- When building contribs/perlapi ignore both INSTALL_BASE and PERL_MM_OPT.
- Use PREFIX instead to avoid build errors from multiple installation
- specifications.
- -- Add job_submit/cnode plugin to support resource reservations of less than
- a full midplane on BlueGene computers. Treat cnodes as liceses which can
- be reserved and are consumed by jobs. This reservation mechanism for less
- than an entire midplane is still under development.
- -- Clear a job's "reason" field when a held job is released.
- -- When releasing a held job, calculate a new priority for it rather than
- just setting the priority to 1.
- -- Fix for sview started on a non-bluegene system to pick colors correctly
- when talking to a real bluegene system.
- -- Improve sched/backfill's expected start time calculation.
- -- Prevent abort of sacctmgr for dump command with invalid (or no) filename.
- -- Improve handling of job updates when using limits in accounting, and
- updating jobs as a non-admin user.
- -- Fix for "squeue --states=all" option. Bug would show no jobs.
- -- Schedule jobs with reservations before those without reservations.
- -- Fix squeue/scancel to query correctly against accounts of different case.
- -- Abort an srun command when it's associated job gets aborted due to a
- dependency that can not be satisfied.
- -- In jobcomp plugins, report start time of zeroif pending job is cancelled.
- Previously may report expected start time.
- -- Fixed sacctmgr man to state correct variables.
- -- Select nodes based upon their Weight when job allocation requests include
- a constraint field with a count (e.g. "srun --constraint=gpu*2 -N4 a.out").
- -- Add support for user names that are entirely numeric and do not treat them
- as UID values. Patch from Dennis Leepow.
- -- Patch to un/pack double values properly if negative value. Patch from
- Dennis Leepow
- -- Do not reset a job's priority when requeued or suspended.
- -- Fix problemm that could let new jobs start on a node in DRAINED state.
- -- Fix cosmetic sacctmgr issue where if the user you are trying to add
- doesn't exist in the /etc/passwd file and the account you are trying
- to add them to doesn't exist it would print (null) instead of the bad
- account name.
- -- Fix associations/qos for when adding back a previously deleted object
- the object will be cleared of all old limits.
- -- BLUEGENE - Added back a lock when creating dynamic blocks to be more thread
- safe on larger systems with heavy load.
- * Changes in SLURM 2.2.3
- ========================
- -- Update srun, salloc, and sbatch man page description of --distribution
- option. Patches from Rod Schulz, Bull.
- -- Applied patch from Martin Perry to fix "Incorrect results for task/affinity
- block second distribution and cpus-per-task > 1" bug.
- -- Avoid setting a job's eligible time while held (priority == 0).
- -- Substantial performance improvement to backfill scheduling. Patch from
- Bjorn-Helge Mevik, University of Oslo.
- -- Make timeout for communications to the slurmctld be based upon the
- MessageTimeout configuration parameter rather than always 3 seconds.
- Patch from Matthieu Hautreux, CEA.
- -- Add new scontrol option of "show aliases" to report every NodeName that is
- associated with a given NodeHostName when running multiple slurmd daemons
- per compute node (typically used for testing purposes). Patch from
- Matthieu Hautreux, CEA.
- -- Fix for handling job names with a "'" in the name within MySQL accounting.
- Patch from Gerrit Renker, CSCS.
- -- Modify condition under which salloc execution delayed until moved to the
- foreground. Patch from Gerrit Renker, CSCS.
- Job control for interactive salloc sessions: only if ...
- a) input is from a terminal (stdin has valid termios attributes),
- b) controlling terminal exists (non-negative tpgid),
- c) salloc is not run in allocation-only (--no-shell) mode,
- d) salloc runs in its own process group (true in interactive
- shells that support job control),
- e) salloc has been configured at compile-time to support background
- execution and is not currently in the background process group.
- -- Abort salloc if no controlling terminal and --no-shell option is not used
- ("setsid salloc ..." is disabled). Patch from Gerrit Renker, CSCS.
- -- Fix to gang scheduling logic which could cause jobs to not be suspended
- or resumed when appropriate.
- -- Applied patch from Martin Perry to fix "Slurmd abort when using task
- affinity with plane distribution" bug.
- -- Applied patch from Yiannis Georgiou to fix "Problem with cpu binding to
- sockets option" behaviour. This change causes "--cpu_bind=sockets" to bind
- tasks only to the CPUs on each socket allocated to the job rather than all
- CPUs on each socket.
- -- Advance daily or weekly reservations immediately after termination to avoid
- having a job start that runs into the reservation when later advanced.
- -- Fix for enabling users to change there own default account, wckey, or QOS.
- -- BLUEGENE - If using OVERLAP mode fixed issue with multiple overlapping
- blocks in error mode.
- -- Fix for sacctmgr to display correctly default accounts.
- -- scancel -s SIGKILL will always sent the RPC to the slurmctld rather than
- the slurmd daemon(s). This insures that tasks in the process of getting
- spawned are killed.
- -- BLUEGENE - If using OVERLAP mode fixed issue with jobs getting denied
- at submit if the only option for their job was overlapping a block in
- error state.
- * Changes in SLURM 2.2.2
- ========================
- -- Correct logic to set correct job hold state (admin or user) when setting
- the job's priority using scontrol's "update jobid=..." rather than its
- "hold" or "holdu" commands.
- -- Modify squeue to report unset --mincores, --minthreads or --extra-node-info
- values as "*" rather than 65534. Patch from Rod Schulz, BULL.
- -- Report the StartTime of a job as "Unknown" rather than the year 2106 if its
- expected start time was too far in the future for the backfill scheduler
- to compute.
- -- Prevent a pending job reason field from inappropriately being set to
- "Priority".
- -- In sched/backfill with jobs having QOS_FLAG_NO_RESERVE set, then don't
- consider the job's time limit when attempting to backfill schedule. The job
- will just be preempted as needed at any time.
- -- Eliminated a bug in sbatch when no valid target clusters are specified.
- -- When explicitly sending a signal to a job with the scancel command and that
- job is in a pending state, then send the request directly to the slurmctld
- daemon and do not attempt to send the request to slurmd daemons, which are
- not running the job anyway.
- -- In slurmctld, properly set the up_node_bitmap when setting it's state to
- IDLE (in case the previous node state was DOWN).
- -- Fix smap to process block midplane names correctly when on a bluegene
- system.
- -- Fix smap to once again print out the Letter 'ID' for each line of a block/
- partition view.
- -- Corrected the NOTES section of the scancel man page
- -- Fix for accounting_storage/mysql plugin to correctly query cluster based
- transactions.
- -- Fix issue when updating database for clusters that were previously deleted
- before upgrade to 2.2 database.
- -- BLUEGENE - Handle mesh torus check better in dynamic mode.
- -- BLUEGENE - Fixed race condition when freeing block, most likely only would
- happen in emulation.
- -- Fix for calculating used QOS limits correctly on a slurmctld reconfig.
- -- BLUEGENE - Fix for bad conn-type set when running small blocks in HTC mode.
- -- If salloc's --no-shell option is used, then do not attempt to preserve the
- terminal's state.
- -- Add new SLURM configure time parameter of --disable-salloc-background. If
- set, then salloc can only execute in the foreground. If started in the
- background, then a message will be printed and the job allocation halted
- until brought into the foreground.
- NOTE: THIS IS A CHANGE IN DEFAULT SALLOC BEHAVIOR FROM V2.2.1, BUT IS
- CONSISTENT WITH V2.1 AND EARLIER.
- -- Added the Multi-Cluster Operation web page.
- -- Removed remnant code for enforcing max sockets/cores/threads in the
- cons_res plugin (see last item in 2.1.0-pre5). This was responsible
- for a bug reported by Rod Schultz.
- -- BLUEGENE - Set correct env vars for HTC mode on a P system to get correct
- block.
- -- Correct RunTime reported by "scontrol show job" for pending jobs.
- * Changes in SLURM 2.2.1
- ========================
- -- Fix setting derived exit code correctly for jobs that happen to have the
- same jobid.
- -- Better checking for time overflow when rolling up in accounting.
- -- Add scancel --reservation option to cancel all jobs associated with a
- specific reservation.
- -- Treat reservation with no nodes like one that starts later (let jobs of any
- size get queued and do not block any pending jobs).
- -- Fix bug in gang scheduling logic that would temporarily resume to many jobs
- after a job completed.
- -- Change srun message about job step being deferred due to SlurmctldProlog
- running to be more clear and only print when --verbose option is used.
- -- Made it so you could remove the hold on jobs with sview by setting the
- priority to infinite.
- -- BLUEGENE - better checking small blocks in dynamic mode whether a full
- midplane job could run or not.
- -- Decrease the maximum sleep time between srun job step creation retry
- attempts from 60 seconds to 29 seconds. This should eliminate a possible
- synchronization problem with gang scheduling that could result in job
- step creation requests only occuring when a job is suspended.
- -- Fix to prevent changing a held job's state from HELD to DEPENDENCY
- until the job is released. Patch from Rod Schultz, Bull.
- -- Fixed sprio -M to reflect PriorityWeight values from remote cluster.
- -- Fix bug in sview when trying to update arbitrary field on more than one
- job. Formerly would display information about one job, but update next
- selected job.
- -- Made it so QOS with UsageFactor set to 0 would make it so jobs running
- under that QOS wouldn't add time to fairshare or association/qos
- limits.
- -- Fixed issue where QOS priority wasn't re-normalized until a slurmctld
- restart when a QOS priority was changed.
- -- Fix sprio to use calculated numbers from slurmctld instead of
- calulating it own numbers.
- -- BLUEGENE - fixed race condition with preemption where if the wind blows the
- right way the slurmctld could lock up when preempting jobs to run others.
- -- BLUEGENE - fixed epilog to wait until MMCS job is totally complete before
- finishing.
- -- BLUEGENE - more robust checking for states when freeing blocks.
- -- Added correct files to the slurm.spec file for correct perl api rpm
- creation.
- -- Added flag "NoReserve" to a QOS to make it so all jobs are created equal
- within a QOS. So if larger, higher priority jobs are unable to run they
- don't prevent smaller jobs from running even if running the smaller
- jobs delay the start of the larger, higher priority jobs.
- -- BLUEGENE - Check preemptees one by one to preempt lower priority jobs first
- instead of first fit.
- -- In select/cons_res, correct handling of the option
- SelectTypeParameters=CR_ONE_TASK_PER_CORE.
- -- Fix for checking QOS to override partition limits, previously if not using
- QOS some limits would be overlooked.
- -- Fix bug which would terminate a job step if any of the nodes allocated to
- it were removed from the job's allocation. Now only the tasks on those
- nodes are terminated.
- -- Fixed issue when using a storage_accounting plugin directly without the
- slurmDBD updates weren't always sent correctly to the slurmctld, appears to
- OS dependent, reported by Fredrik Tegenfeldt.
- * Changes in SLURM 2.2.0
- ========================
- -- Change format of Duration field in "scontrol show reservation" output from
- an integer number of minutes to "[days-]hours:minutes:seconds".
- -- Add support for changing the reservation of pending or running jobs.
- -- On Cray systems only, salloc sends SIGKILL to spawned process group when
- job allocation is revoked. Patch from Gerrit Renker, CSCS.
- -- Fix for sacctmgr to work correctly when modifying user associations where
- all the associations contain a partition.
- -- Minor mods to salloc signal handling logic: forwards more signals and
- releases allocation on real-time signals. Patch from Gerrit Renker, CSCS.
- -- Add salloc logic to preserve tty attributes after abnormal exit. Patch
- from Mark Grondona, LLNL.
- -- BLUEGENE - Fix for issue in dynamic mode when trying to create a block
- overlapping a block with no job running on it but in configuring state.
- -- BLUEGENE - Speedup by skipping blocks that are deallocating for other jobs
- when starting overlapping jobs in dynamic mode.
- -- Fix for sacct --state to work correctly when not specifying a start time.
- -- Fix upgrade process in accounting from 2.1 for clusters named "cluster".
- -- Export more jobacct_common symbols needed for the slurm api on some systems.
- * Changes in SLURM 2.2.0.rc4
- ============================
- -- Correction in logic to spread out over time highly parallel messages to
- minimize lost messages. Effects slurmd epilog complete messages and PMI
- key-pair transmissions. Patch from Gerrit Renker, CSCS.
- -- Fixed issue where if a system has unset messages to the dbd in 2.1 and
- upgrades to 2.2. Messages are now processed correctly now.
- -- Fixed issue where assoc_mgr cache wasn't always loaded correctly if the
- slurmdbd wasn't running when the slurmctld was started.
- -- Make sure on a pthread create in step launch that the error code is looked
- at. Improves fault-tolerance of slurmd.
- -- Fix setting up default acct/wckey when upgrading from 2.1 to 2.2.
- -- Fix issue with associations attached to a specific partition with no other
- association, and requesting a different partition.
- -- Added perlapi to the slurmdb to the slurm.spec.
- -- In sched/backfill, correct handling of CompleteWait parameter to avoid
- backfill scheduling while a job is completing. Patch from Gerrit Renker,
- CSCS.
- -- Send message back to user when trying to launch job on computing lacking
- that user ID. Patch from Hongjia Cao, NUDT.
- -- BLUEGENE - Fix it so 1 midplane clusters will run small block jobs.
- -- Add Command and WorkDir to the output of "scontrol show job" for job
- allocations created using srun (not just sbatch).
- -- Fixed sacctmgr to not add blank defaultqos' when doing a cluster dump.
- -- Correct processing of memory and disk space specifications in the salloc,
- sbatch, and srun commands to work properly with a suffix of "MB", "GB",
- etc. and not only with a single letter (e.g. "M", "G", etc.).
- -- Prevent nodes with suspended jobs from being powered down by SLURM.
- -- Normalized the way pidfile are created by the slurm daemons.
- -- Fixed modifying the root association to no read in it's last value
- when clearing a limit being set.
- -- Revert some resent signal handling logic from salloc so that SIGHUP sent
- after the job allocation will properly release the allocation and cause
- salloc to exit.
- -- BLUEGENE - Fix for recreating a block in a ready state.
- -- Fix debug flags for incorrect logic when dealing with DEBUG_FLAG_WIKI.
- -- Report reservation's Nodes as a hostlist expression of all nodes rather
- than using "ALL".
- -- Fix reporting of nodes in BlueGene reservation (was reporting CPU count
- rather than cnode count in scontrol output for NodeCnt field).
- * Changes in SLURM 2.2.0.rc3
- ============================
- -- Modify sacctmgr command to accept plural versions of options (e.g. "Users"
- in addition to "User"). Patch from Don Albert, BULL.
- -- BLUEGENE - make it so reset of boot counter happens only on state change
- and not when a new job comes along.
- -- Modify srun and salloc signal handling so they can be interrupted while
- waiting for an allocation. This was broken in version 2.2.0.rc2.
- -- Fix NULL pointer reference in sview. Patch from Gerrit Renker, CSCS.
- -- Fix file descriptor leak in slurmstepd on spank_task_post_fork() failure.
- Patch from Gerrit Renker, CSCS.
- -- Fix bug in preserving job state information when upgrading from SLURM
- version 2.1. Bug introduced in version 2.2.0-pre10. Patch from Par
- Andersson, NSC.
- -- Fix bug where if using the slurmdbd if a job wasn't able to start right
- away some accounting information may be lost.
- -- BLUEGENE - when a prolog failure happens the offending block is put in
- an error state.
- -- Changed the last column heading of the sshare output from "FS Usage" to
- "FairShare" and added more detail to the sshare man page.
- -- Fix bug in enforcement of reservation by account name. Used wrong index
- into an array. Patch from Gerrit Renker, CSCS.
- -- Modify job_submit/lua plugin to treat any non-zero return code from the
- job_submit and job_modify functions as an error and the user request should
- be aborted.
- -- Fix bug which would permit pending job to be started on completing node
- when job preemption is configured.
- * Changes in SLURM 2.2.0.rc2
- ============================
- -- Fix memory leak in job step allocation logic. Patch from Hongjia Cao, NUDT.
- -- If a preempted job was submitted with the --no-requeue option then cancel
- rather than requeue it.
- -- Fix for problems when adding a user for the first time to a new cluster
- with a 2.1 sacctmgr without specifying a default account.
- -- Resend TERMINATE_JOB message only to nodes that the job still has not
- terminated on. Patch from Hongjia Cao, NUDT.
- -- Treat time limit specification of "0:300" as a request for 300 seconds
- (5 minutes) instead of one minute.
- -- Modify sched/backfill plugin logic to continue working its way down the
- queue of jobs rather than restarting at the top if there are no changes in
- job, node, or partition state between runs. Patch from Hongjia Cao, NUDT.
- -- Improve scalability of select/cons_res logic. Patch from Matthieu Hautreux,
- CEA.
- -- Fix for possible deadlock in the slurmstepd when cancelling a job that is
- also writing a large amount of data to stderr.
- -- Fix in select/cons_res to eliminate "mem underflow" error when the
- slurmctld is reconfigured while a job is in completing state.
- -- Send a message to the a user's job when it's real or virual memory limit
- is exceeded. :
- -- Apply rlimits right before execing the users task so to lower the risk of
- the task exiting because the slurmstepd ran over a limit (log file size,
- etc.)
- -- Add scontrol command of "uhold <job_id>" so that an administrator can hold
- a job and let the job's owner release it. The scontrol command of
- "hold <job_id>" when executed by a SLURM administrator can only be released
- by a SLURM administrator and not the job owner.
- -- Change atoi to slurm_atoul in mysql plugin, needed for running on 32-bit
- systems in some cases.
- -- If a batch job is found to be missing from a node, make its termination
- state be NODE_FAIL rather than CANCELLED.
- -- Fatal error put back if running a bluegene or cray plugin from a controller
- not of that type.
- -- Make sure jobacct_gather plugin is not shutdown before messing with the
- proccess list.
- -- Modify signal handling in srun and salloc commands to avoid deadlock if the
- malloc function is interupted and called again. The malloc function is
- thread safe, but not reentrant, which is a problem when signal handling if
- the malloc function itself has a lock. Problem fixed by moving signal
- handling in those commands to a new pthread.
- -- In srun set job abort flag on completion to handle the case when a user
- cancels a job while the node is not responding but slurmctld has not yet
- the node down. Patch from Hongjia Cao, NUDT.
- -- Streamline the PMI logic if no duplicate keys are included in the key-pairs
- managed. Substantially improves performance for large numbers of tasks.
- Adds support for SLURM_PMI_KVS_NO_DUP_KEYS environment variable. Patch
- from Hongjia Cao, NUDT.
- -- Fix issues with sview dealing with older versions of sview and saving
- defaults.
- -- Remove references to --mincores, --minsockets, and --minthreads from the
- salloc, sbatch and srun man pages. These options are defunct, Patch from
- Rod Schultz, Bull.
- -- Made openssl not be required to build RPMs, it is not required anymore
- since munge is the default crypto plugin.
- -- sacctmgr now has smarts to figure out if a qos is a default qos when
- modifing a user/acct or removing a qos.
- -- For reservations on BlueGene systems, set and report c-node counts rather
- than midplane counts.
- * Changes in SLURM 2.2.0.rc1
- ============================
- -- Add show_flags parameter to the slurm_load_block_info() function.
- -- perlapi has been brought up to speed courtesy of Hongjia Coa. (make sure to
- run 'make clean' if building in a different dir than source)
- -- Fixed regression in pre12 in crypto/munge when running with
- --enable-multiple-slurmd which would cause the slurmd's to core.
- -- Fixed regression where cpu count wasn't figured out correctly for steps.
- -- Fixed issue when using old mysql that can't handle a '.' in the table
- name.
- -- Mysql plugin works correctly without the SlurmDBD
- -- Added ability to query batch step with sstat. Currently no accounting data
- is stored for the batch step, but the internals are inplace if we decide to
- do that in the future.
- -- Fixed some backwards compatibility issues with 2.2 talking to 2.1.
- -- Fixed regression where modifying associations didn't get sent to the
- slurmctld.
- -- Made sshare sort things the same way saccmgr list assoc does
- (alphabetically)
- -- Fixed issue with default accounts being set up correctly.
- -- Changed sortting in the slurmctld so sshare output is similar to that of
- sacctmgr list assoc.
- -- Modify reservation logic so that daily and weekly reservations maintain
- the same time when daylight savings time starts or ends in the interim.
- -- Edit to make reservations handle updates to associations.
- -- Added the derived exit code to the slurmctld job record and the derived
- exit code and string to the job record in the SLURM db.
- -- Added slurm-sjobexit RPM for SLURM job exit code management tools.
- -- Added ability to use sstat/sacct against the batch step.
- -- Added OnlyDefaults option to sacctmgr list associations.
- -- Modified the fairshare priority formula to F = 2**(-Ue/S)
- -- Modify the PMI functions key-pair exchange function to support a 32-bit
- counter for larger job sizes. Patch from Hongjia Cao, NUDT.
- -- In sched/builtin - Make the estimated job start time logic faster (borrowed
- new logic from sched/backfill and added pthread) and more accurate.
- -- In select/cons_res fix bug that could result in a job being allocated zero
- CPUs on some nodes. Patch from Hongjia Cao, NUDT.
- -- Fix bug in sched/backfill that could set expected start time of a job too
- far in the future.
- -- Added ability to enforce new limits given to associations/qos on
- pending jobs.
- -- Increase max message size for the slurmdbd from 1000000 to 16*1024*1024
- -- Increase number of active threads in the slurmdbd from 50 to 100
- -- Fixed small bug in src/common/slurmdb_defs.c reported by Bjorn-Helge Mevik
- -- Fixed sacctmgr's ability to query associations against qos again.
- -- Fixed sview show config on non-bluegene systems.
- -- Fixed bug in selecting jobs based on sacct -N option
- -- Fix bug that prevented job Epilog from running more than once on a node if
- a job was requeued and started no job steps.
- -- Fixed issue where node index wasn't stored correcting when using DBD.
- -- Enable srun's use of the --nodes option with --exclusive (previously the
- --nodes option was ignored).
- -- Added UsageThreshold and Flags to the QOS object.
- -- Patch to improve threadsafeness in the mysql plugins.
- -- Add support for fair-share scheduling to be based upon resource use at
- the level of bank accounts and ignore use of individual users. Patch by
- Par Andersson, National Supercomputer Centre, Sweden.
- * Changes in SLURM 2.2.0.pre12
- ==============================
- -- Log if Prolog or Epilog run for longer than MessageTimeout / 2.
- -- Log the RPC number associated with messages from slurmctld that timeout.
- -- Fix bug in select/cons_res logic when job allocation includes --overcommit
- and --ntasks-per-node options and the node has fewer CPUs than the count
- specified by --ntasks-per-node.
- -- Fix bug in gang scheduling and job preemption logic so that preempted jobs
- get resumed properly after a slurmctld hot-start.
- -- Fix bug in select/linear handling of gang scheduled jobs that could result
- in run_job_cnt underflow error message.
- -- Fix bug in gang scheduling logic to properly support partitions added
- using the scontrol command.
- -- Fix a segmentation fault in sview where the 'excluded_partitions' field
- was set to NULL, caused by the absence of ~/.slurm/sviewrc.
- -- Rewrote some calls to is_user_any_coord() in src/plugins/accounting_storage
- modules to make use of is_user_any_coord()'s return value.
- -- Add configure option of --with=dimensions=#.
- -- Modify srun ping logic so that srun would only be considered not responsive
- if three ping messages were not responded to. Patch from Hongjia Cao (NUDT).
- -- Preserve a node's ReasonTime field after scontrol reconfig command. Patch
- from Hongjia Cao (NUDT).
- -- Added the authority for users with AdminLevel's defined in the SLURM db
- (Operators and Admins) and account coordinators to invoke commands that
- affect jobs, reservations, nodes, etc.
- -- Fix for slurmd restart on completing node with no tasks to get the correct
- state, completing. Patch from Hongjia Cao (NUDT).
- -- Prevent scontrol setting a node's Reason="". Patch from Hongjia Cao (NUDT).
- -- Add new functions hostlist_ranged_string_malloc,
- hostlist_ranged_string_xmalloc, hostlist_deranged_string_malloc, and
- hostlist_deranged_string_xmalloc which will allocate memory as needed.
- -- Make the slurm commands support both the --cluster and --clusters option.
- Previously, some commands support one of those options, but not the other.
- -- Fix bug when resizing a job that has steps running on some of those nodes.
- Avoid killing the job step on remaining nodes. Patch from Rod Schultz
- (BULL). Also fix bug related to tracking the CPUs allocated to job steps
- on each node after releasing some nodes from the job's allocation.
- -- Applied patch from Rod Schultz / Matthieu Hautreux to keep the Node-to-Host
- cache from becoming corrupted when a hostname cannot be resolved.
- -- Export more symbols in libslurm for job and node state information
- translation (numbers to strings). Patch from Hongia Cao, NUDT.
- -- Add logic to retry sending RESPONSE_LAUNCH_TASKS messages from slurmd to
- srun. Patch from Hongia Cao, NUDT.
- -- Modify bit_unfmt_hexmask() and bit_unfmt_binmask() functions to clear the
- bitmap input before setting the bits indicated in the input string.
- -- Add SchedulerParameters option of bf_window to control how far into the
- future that the backfill scheduler will look when considering jobs to start.
- The default value is one day. See "man slurm.conf" for details.
- -- Fix bug that can result in duplicate job termination records in accounting
- for job termination when slurmctld restarts or reconfigures.
- -- Modify plugin and library logic as needed to support use of the function
- slurm_job_step_stat() from user commands.
- -- Fix race condition in which PrologSlurmctld failure could cause slurmctld
- to abort.
- -- Fix bug preventing users in secondary user groups from being granted access
- to partitions configured with AllowGroups.
- -- Added support for a default account and wckey per cluster within accounting.
- -- Modified select/cons_res plugin so that if MaxMemPerCPU is configured and a
- job specifies it's memory requirement, then more CPUs than requested will
- automatically be allocated to a job to honor the MaxMemPerCPU parameter.
- -- Added the derived_ec (exit_code) member to job_info_t. exit_code captures
- the exit code of the job script (or salloc) while derived_ec contains the
- highest exit code of all the job steps.
- -- Added SLURM_JOB_EXIT_CODE and SLURM_JOB_DERIVED_EC variables to the
- EpilogSlurmctld environment
- -- More work done on the accounting_storage/pgsql plugin, still beta.
- Patch from Hongjia Cao (NUDT).
- -- Major updates to sview from Dan Rusak (Bull), including:
- - Persistent option selections for each tab page
- - Clean up topology in grids
- - Leverage AllowGroups and Hidden options
- - Cascade full-info popups for ease of selection
- -- Add locks around the MySQL calls for proper operation if the non-thread
- safe version of the MySQL library is used.
- -- Remove libslurm.a, libpmi.a and libslurmdb.a from SLURM RPM. These static
- libraries are not generally usable.
- -- Fixed bug in sacctmgr when zeroing raw usage reported by Gerrit Renker.
- * Changes in SLURM 2.2.0.pre11
- ==============================
- -- Permit a regular user to change the partition of a pending job.
- -- Major re-write of the job_submit/lua plugin to pass pointers to available
- partitions and use lua metatables to reference the job and partition fields.
- -- Add support for serveral new trigger types: SlurmDBD failure/restart,
- Database failure/restart, Slurmctld failure/restart.
- -- Add support for SLURM_CLUSTERS environment variable in the sbatch, sinfo,
- squeue commands.
- -- Modify the sinfo and squeue commands to report state of multiple clusters
- if the --clusters option is used.
- -- Added printf __attribute__ qualifiers to info, debug, ... to help prevent
- bad/incorrect parameters being sent to them. Original patch from
- Eygene Ryabinkin (Russian Research Centre).
- -- Fix bug in slurmctld job completion logic when nodes allocated to a
- completing job are re-booted. Patch from Hongjia Cao (NUDT).
- -- In slurmctld's node record data structure, rename "hilbert_integer" to
- "node_rank".
- -- Add topology/node_rank plugin to sort nodes based upon rank loaded from
- BASIL on Cray computers.
- -- Fix memory leak in the auth/munge and crypto/munge plugins in the case of
- some failure modes.
- * Changes in SLURM 2.2.0.pre10
- ==============================
- -- Fix issue when EnforcePartLimits=yes in slurm.conf all jobs where no nodecnt
- was specified the job would be seen to have maxnodes=0 which would not
- allow jobs to run.
- -- Fix issue where if not suspending a job the gang scheduler does the correct
- kill procedure.
- -- Fixed some issues when dealing with jobs from a 2.1 system so they live
- after an upgrade.
- -- In srun, log if --cpu_bind options are specified, but not supported by the
- current system configuration.
- -- Various Patchs from Hongjia Cao dealing with bugs found in sacctmgr and
- the slurmdbd.
- -- Fix bug in changing the nodes allocated to a running job and some node
- names specified are invalid, avoid invalid memory reference.
- -- Fixed filename substitution of %h and %n based on patch from Ralph Bean
- -- Added better job sorting logic when preempting jobs with qos.
- -- Log the IP address and port number for some communication errors.
- -- Fix bug in select/cons_res when --cpus_per_task option is used, could
- oversubscribe resources.
- -- In srun, do not implicitly set the job's maximum node count based upon a
- required hostlist.
- -- Avoid running the HealthCheckProgram on non-responding nodes rather than
- DOWN nodes.
- -- Fix bug in handling of poll() functions on OS X (SLURM was ignoring POLLIN
- if POLLHUP flag was set at the same time).
- -- Pulled Cray logic out of common/node_select.c into it's own
- select/cray plugin cons_res is the default. To use linear add 'Linear' to
- SelectTypeParameters.
- -- Fixed bug where resizing jobs didn't correctly set used limits correctly.
- -- Change sched/backfill default time interval to 30 seconds and defer attempt
- to backfill schedule if slurmctld has more than 5 active RPCs. General
- improvements in logic scalability.
- -- Add SchedulerParameters option of default_sched_depth=# to control how
- many jobs on queue should be tested for attempted scheduling when a job
- completes or other routine events. Default value is 100 jobs. The full job
- queue is tested on a less frequent basis. This option can dramatically
- improve performance on systems with thousands of queued jobs.
- -- Gres/gpu now sets the CUDA_VISIBLE_DEVICES environment to control which
- GPU devices should be used for each job or job step and CUDA version 3.1+
- is used. NOTE: SLURM's generic resource support is still under development.
- -- Modify select/cons_res to pack jobs onto allocated nodes differently and
- minimize system fragmentation. For example on nodes with 8 CPUs each, a
- job needing 10 CPUs will now ideally be allocated 8 CPUs on one node and
- 2 CPUs on another node. Previously the job would have ideally been
- allocated 5 CPUs on each node, fragmenting the unused resources more.
- -- Modified the behavior of update_job() in job_mgr.c to return when the first
- error is encountered instead of continuing with more job updates.
- -- Removed all references to the following slurm.conf parameters, all of which
- have been removed or replaced since version 2.0 or earlier: HashBase,
- HeartbeatInterval, JobAcctFrequency, JobAcctLogFile (instead use
- AccountingStorageLoc), JobAcctType, KillTree, MaxMemPerTask, and
- MpichGmDirectSupport.
- -- Fix bug in slurmctld restart logic that improperly reported jobs had
- invalid features: "Job 65537 has invalid feature list: fat".
- -- BLUEGENE - Removed thread pool for destroying blocks. It turns out the
- memory leak we were concerned about for creating and destroying threads
- in a plugin doesn't exist anymore. This increases throughput dramatically,
- allowing multiple jobs to start at the same time.
- -- BLUEGENE - Removed thread pool for starting and stopping jobs. For similar
- reasons as noted above.
- -- BLUEGENE - Handle blocks that never deallocate.
- * Changes in SLURM 2.2.0.pre9
- =============================
- -- sbatch can now submit jobs to multiple clusters and run on the earliest
- available.
- -- Fix bug introduced in pre8 that prevented job dependencies and job
- triggers from working without the --enable-debug configure option.
- -- Replaced slurm_addr with slurm_addr_t
- -- Replaced slurm_fd with slurm_fd_t
- -- Skeleton code added for BlueGeneQ.
- -- Jobs can now be submitted to multiple partitions (job queues) and use the
- one permitting earliest start time.
- -- Change slurmdb_coord_table back to acct_coord_table to keep consistant
- with < 2.1.
- -- Introduced locking system similar to that in the slurmctld for the
- assoc_mgr.
- -- Added ability to change a users name in accounting.
- -- Restore squeue support for "%G" format (group id) accidentally removed in
- 2.2.0.pre7.
- -- Added preempt_mode option to QOS.
- -- Added a grouping=individual for sreport size reports.
- -- Added remove_qos logic to jobs running under a QOS that was removed.
- -- scancel now exits with a 1 if any job is non-existant when canceling.
- -- Better handling of select plugins that don't exist on various systems for
- cross cluster communication. Slurmctld, slurmd, and slurmstepd now only
- load the default select plugin as well.
- -- Better error handling when loading plugins.
- -- Prevent scontrol from aborting if getlogin() returns NULL.
- -- Prevent scontrol segfault when there are hidden nodes.
- -- Prevent srun segfault after task launch failure.
- -- Added job_submit/lua plugin.
- -- Fixed sinfo on a bluegene system to print correctly the output for:
- sinfo -e -o "%9P %6m %.4c %.22F %f"
- -- Add scontrol commands "hold" and "release" to simplify setting a job's
- priority to 0 or 1. Also tests that the job is in pending state.
- -- Increase maximum node list size (for incoming RPC) from 1024 bytes to 64k.
- -- In the backup slurmctld, purge triggers before recovering trigger state to
- avoid duplicate entries.
- -- Fix bug in sacct processing of --fields= option.
- -- Fix bug in checkpoint/blcr for jobs spanning multiple nodes introduced when
- changing some variable names in version 2.2.0.pre5.
- -- Removed the vestigal set_max_cluster_usage() function from the Priority
- Plugin API.
- -- Modify the output of "scontrol show job" for the field ReqS:C:T=. Fields
- not specified by the user will be reported as "*" instead of 65534.
- -- Added DefaultQOS option for an association.
- -- BLUEGENE - Added -B option to the slurmctld to clear created blocks from
- the system on start.
- -- BLUEGENE - Added option to scontrol & sview to recreate existing blocks.
- -- Fixed flags for returning messages to use the correct munge key when going
- cross-cluster.
- -- BLUEGENE - Added option to scontrol & sview to resume blocks in an error
- state instead of just freeing them.
- -- sview patched to allow multiple row selection of jobs, patch from Dan Rusak
- -- Lower default slurmctld server thread count from 1024 to 256. Some systems
- process threads on a last-in first-out basis and the high thread count was
- causing unexpectedly high delays for some RPCs.
- -- Added to sacctmgr the ability for admins to reset the raw usage of a user
- or account
- -- Improved the efficiency of a few lines in sacctmgr
- * Changes in SLURM 2.2.0.pre8
- =============================
- -- Add DebugFlags parameter of "Backfill" for sched/backfill detailed logging.
- -- Add DebugFlags parameter of "Gang" for detailed logging of gang scheduling
- activities.
- -- Add DebugFlags parameter of "Priority" for detailed logging of priority
- multifactor activities.
- -- Add DebugFlags parameter of "Reservation" for detailed logging of advanced
- reservations.
- -- Add run time to mail message upon job termination and queue time for mail
- message upon job begin.
- -- Add email notification option for job requeue.
- -- Generate a fatal error if the srun --relative option is used when not
- within an existing job allocation.
- -- Modify the meaning of InactiveLimit slightly. It will now cancel the job
- allocation created using the salloc or srun command if those commands
- cease responding for the InactiveLimit regardless of any running job steps.
- This parameter will no longer effect jobs spawned using sbatch.
- -- Remove AccountingStoragePass and JobCompPass from configuration RPC and
- scontrol show config command output. The use of SlurmDBD is still strongly
- recommended as SLURM will have limited database functionality or protection
- otherwise.
- -- Add sbatch options of --export and SBATCH_EXPORT to control which
- environment variables (if any) get propagated to the spawned job. This is
- particularly important for jobs that are submitted on one cluster and run
- on a different cluster.
- -- Fix bug in select/linear when used with gang scheduling and there are
- preempted jobs at the time slurmctld restarts that can result in over-
- subscribing resources.
- -- Added keeping track of the qos a job is running with in accounting.
- -- Fix for handling correctly jobs that resize, and also reporting correct
- stats on a job after it finishes.
- -- Modify gang scheduler so with SelectTypeParameter=CR_CPUS and task
- affinity is enabled, keep track of the individual CPUs allocated to jobs
- rather than just the count of CPUs allocated (which could overcommit
- specific CPUs for running jobs).
- -- Modify select/linear plugin data structures to eliminate underflow errors
- for the exclusive_cnt and tot_job_cnt variables (previously happened when
- slurmctld reconfigured while the job was in completing state).
- -- Change slurmd's working directory (and location of core files) to match
- that of the slurmctld daemon: the same directory used for log files,
- SlurmdLogFile (if specified with an absolute pathname) otherwise the
- directory used to save state, SlurmdSpoolDir.
- -- Add sattach support for the --pty option.
- -- Modify slurmctld communications logic to accept incoming messages on more
- than one port for improved scalability.
- -- Add SchedulerParameters option of "defer" to avoid trying to schedule a
- job at submission time, but to attempt scheduling many jobs at once for
- improved performance under heavy load.
- -- Correct logic controlling slurmctld thread limit eliminating check of
- RLIMIT_STACK.
- -- Make slurmctld's trigger logic more robust in the event that job records
- get purged before their trigger can be processed (e.g. MinJobAge=1).
- -- Add support for users to hold/release their own jobs (submit the job with
- srun/sbatch --hold/-H option or use "scontrol update jobid=# priority=0"
- to hold and "scontrol update jobid=# priority=1" to release).
- -- Added ability for sacct to query jobs by qos and a range of timelimits.
- -- Added ability for sstat to query pids of steps running.
- -- Support time specification in UTS format with a prefix of "uts" (e.g.
- "sbatch --begin=uts458389988 my.script").
- * Changes in SLURM 2.2.0.pre7
- =============================
- -- Fixed issue with sacctmgr if querying against non-existent cluster it
- works the same way as 2.1.
- -- Added infrastructure to support allocation of generic node resources (gres).
- -Modified select/linear and select/cons_res plugins to allocate resources
- at the level of a job without oversubcription.
- -Get sched/backfill operating with gres allocations.
- -Get gres configuration changes (reconfiguration) working.
- -Have job steps allocate resources.
- -Modified job step credential to include the job's and step's gres
- allocation details.
- -Integrate with HWLOC library to identify GPUs and NICs configured on each
- node.
- -- SLURM commands (squeue, sinfo, etc...) can now go cross-cluster on like
- linux systems. Cross-cluster for bluegene to linux and such should
- work fine, even sview.
- -- Added the ability to configure PreemptMode on a per-partition basis.
- -- Change slurmctld's default thread limit count to 1024, but adjust that down
- as needed based upon the process's resource limits.
- -- Removed the non-functional "SystemCPU" and "TotalCPU" reporting fields from
- sstat and updated man page
- -- Correct location of apbasil command on Cray XT systems.
- -- Fixed bug in MinCPU and AveCPU calculations in sstat command
- -- Send message to srun when the Prolog takes too long (MessageTimeout) to
- complete.
- -- Change timeout for socket connect() to be half of configured MessageTimeout.
- -- Added high-throughput computing web page with configuration guidance.
- -- Use more srun sockets to process incoming PMI (MPICH2) connections for
- better scalability.
- -- Added DebugFlags for the select/bluegene plugin: DEBUG_FLAG_BG_PICK,
- DEBUG_FLAG_BG_WIRES, DEBUG_FLAG_BG_ALGO, and DEBUG_FLAG_BG_ALGO_DEEP.
- -- Remove vestigial job record field "kill_on_step_done" (internal to the
- slurmctld daemon only).
- -- For MPICH2 jobs: Clear PMI state between job steps.
- * Changes in SLURM 2.2.0.pre6
- =============================
- -- sview - added ability to see database configuration.
- -- sview - added ability to add/remove visible tabs.
- -- sview - change way grid highlighting takes place on selected objects.
- -- Added infrastructure to support allocation of generic node resources.
- -Added node configuration parameter of Gres=.
- -Added ability to view/modify a node's gres using scontrol, sinfo and sview.
- -Added salloc, sbatch and srun --gres option.
- -Added ability to view a job or job step's gres using scontrol, squeue and
- sview.
- -Added new configuration parameter GresPlugins to define plugins used to
- manage generic resources.
- -Added framework for gres plugins.
- -Added DebugFlags option of "gres" for detailed debugging of gres actions.
- -- Slurmd modified to log slow slurmstepd startup and note possible file system
- problem.
- -- sview - There is now a .slurm/sviewrc created when running sview.
- Defaults are put in there as to how sview looks when first launched.
- You can set these by Ctrl-S or Options->Set Default Settings.
- -- Add scontrol "wait_job <job_id>" option to wait for nodes to boot as needed.
- Useful for batch jobs (in Prolog, PrologSlurmctld or the script) if powering
- down idle nodes.
- -- Added salloc and sbatch option --wait-all-nodes. If set non-zero, job
- initiation will be delayed until all allocated nodes have booted. Salloc
- will log the delay with the messages "Waiting for nodes to boot" and "Nodes
- are ready for job".
- -- The Priority/mulitfactor plugin now takes into consideration size of job
- in cpus as well as size in nodes when looking at the job size factor.
- Previously only nodes were considered.
- -- When using the SlurmDBD messages waiting to be sent will be combined
- and sent in one message.
- -- Remove srun's --core option. Move the logic to an optional SPANK plugin
- (currently in the contribs directory, but plan to distribute through
- http://code.google.com/p/slurm-spank-plugins/).
- -- Patch for adding CR_CORE_DEFAULT_DIST_BLOCK as a select option to layout
- jobs using block layout across cores within each node instead of cyclic
- which was previously the default.
- -- Accounting - When removing associations if jobs are running, those jobs
- must be killed before proceeding. Before the jobs were killed
- automatically thus causing user confusion on what is most likely an
- admin's mistake.
- -- sview - color column keeps reference color when highlighting.
- -- Configuration parameter MaxJobCount changed from 16-bit to 32-bit field.
- The default MaxJobCount was changed from 5,000 to 10,000.
- -- SLURM commands (squeue, sinfo, etc...) can now go cross-cluster on like
- linux systems. Cross-cluster for bluegene to linux and such does not
- currently work. You can submit jobs with sbatch. Salloc and srun are not
- cross-cluster compatible, and given their nature to talk to actual compute
- nodes these will likely never be.
- -- salloc modified to forward SIGTERM to the spawned program.
- -- In sched/wiki2 (for Moab support) - Add GRES and WCKEY fields to MODIFYJOBS
- and GETJOBS commands. Add GRES field to GETNODES command.
- -- In struct job_descriptor and struct job_info: rename min_sockets to
- sockets_per_node, min_cores to cores_per_socket, and min_threads to
- threads_per_core (the values are not minimum, but represent the target
- values).
- -- Fixed bug in clearing a partition's DisableRootJobs value reported by
- Hongjia Cao.
- -- Purge (or ignore) terminated jobs in a more timely fashion based upon the
- MinJobAge configuration parameter. Small values for MinJobAge should improve
- responsiveness for high job throughput.
- * Changes in SLURM 2.2.0.pre5
- =============================
- -- Modify commands to accept time format with one or two digit hour value
- (e.g. 8:00 or 08:00 or 8:00:00 or 08:00:00).
- -- Modify time parsing logic to accept "minute", "hour", "day", and "week" in
- addition to the currently accepted "minutes", "hours", etc.
- -- Add slurmd option of "-C" to print actual hardware configuration and exit.
- -- Pass EnforcePartLimits configuration parameter from slurmctld for user
- commands to see the correct value instead of always "NO".
- -- Modify partition data structures to replace the default_part,
- disable_root_jobs, hidden and root_only fields with a single field called
- "flags" populated with the flags PART_FLAG_DEFAULT, PART_FLAG_NO_ROOT
- PART_FLAG_HIDDEN and/or PART_FLAG_ROOT_ONLY. This is a more flexible
- solution besides making for smaller data structures.
- -- Add node state flag of JOB_RESIZING. This will only exist when a job's
- accounting record is being written immediately before or after it changes
- size. This permits job accounting records to be written for a job at each
- size.
- -- Make calls to jobcomp and accounting_storage plugins before and after a job
- changes size (with the job state being JOB_RESIZING). All plugins write a
- record for the job at each size with intermediate job states being
- JOB_RESIZING.
- -- When changing a job size using scontrol, generate a script that can be
- executed by the user to reset SLURM environment variables.
- -- Modify select/linear and select/cons_res to use resources released by job
- resizing.
- -- Added to contribs foundation for Perl extension for slurmdb library.
- -- Add new configuration parameter JobSubmitPlugins which provides a mechanism
- to set default job parameters or perform other site-configurable actions at
- job submit time.
- -- Better postgres support for accounting, still beta.
- -- Speed up job start when using the slurmdbd.
- -- Forward step failure reason back to slurmd before in some cases it would
- just be SLURM_FAILURE returned.
- -- Changed squeue to fail when passed invalid -o <output_format> or
- -S <sort_list> specifications.
- * Changes in SLURM 2.2.0.pre4
- =============================
- -- Add support for a PropagatePrioProcess configuration parameter value of 2
- to restrict spawned task nice values to that of the slurmd daemon plus 1.
- This insures that the slurmd daemon always have a higher scheduling
- priority than spawned tasks.
- -- Add support in slurmctld, slurmd and slurmdbd for option of "-n <value>" to
- reset the daemon's nice value.
- -- Fixed slurm_load_slurmd_status and slurm_pid2jobid to work correctly when
- multiple slurmds are in use.
- -- Altered srun to set max_nodes to min_nodes if not set when doing an
- allocation to mimic that which salloc and sbatch do. If running a step if
- the max isn't set it remains unset.
- -- Applied patch from David Egolf (David.Egolf@Bull.com). Added the ability
- to purge/archive accounting data on a day or hour basis, previously
- it was only available on a monthly basis.
- -- Add support for maximum node count in job step request.
- -- Fix bug in CPU count logic for job step allocation (used count of CPUS per
- node rather than CPUs allocated to the job).
- -- Add new configuration parameters GroupUpdateForce and GroupUpdateTime.
- See "man slurm.conf" for details about how these control when slurmctld
- updates its information of which users are in the groups allowed to use
- partitions.
- -- Added sacctmgr list events which will list events that have happened on
- clusters in accounting.
- -- Permit a running job to shrink in size using a command of
- "scontrol update JobId=# NumNodes=#" or
- "scontrol update JobId=# NodeList=<names>". Subsequent job steps must
- explicitly specify an appropriate node count to work properly.
- -- Added resize_time field to job record noting the time of the latest job
- size change (to be used for accounting purposes).
- -- sview/smap now hides hidden partitions and their jobs by default, with an
- option to display them.
- * Changes in SLURM 2.2.0.pre3
- =============================
- -- Refine support for TotalView partial attach. Add parameter to configure
- program of "--enable-partial-attach".
- -- In select/cons_res, the count of CPUs on required nodes was formerly
- ignored in enforcing the maximum CPU limit. Also enforce maximum CPU
- limit when the topology/tree plugin is configured (previously ignored).
- -- In select/cons_res, allocate cores for a job using a best-fit approach.
- -- In select/cons_res, for jobs that can run on a single node, use a best-fit
- packing approach.
- -- Add support for new partition states of DRAIN and INACTIVE and new partition
- option of "Alternate" (alternate partition to use for jobs submitted to
- partitions that are currently in a state of DRAIN or INACTIVE).
- -- Add group membership cache. This can substantially speed up slurmctld
- startup or reconfiguration if many partitions have AllowGroups configured.
- -- Added slurmdb api for accessing slurm DB information.
- -- In select/linear: Modify data structures for better performance and to
- avoid underflow error messages when slurmctld restarts while jobs are
- in completing state.
- -- Added hash for slurm.conf so when nodes check in to the controller it can
- verify the slurm.conf is the same as the one it is running. If not an
- error message is displayed. To silence this message add NO_CONF_HASH
- to DebugFlags in your slurm.conf.
- -- Added error code ESLURM_CIRCULAR_DEPENDENCY and prevent circular job
- dependencies (e.g. job 12 dependent upon job 11 AND job 11 is dependent
- upon job 12).
- -- Add BootTime and SlurmdStartTime to available node information.
- -- Fixed moab_2_slurmdb to work correctly under new database schema.
- -- Slurmd will drain a compute node when the SlurmdSpoolDir is full.
- * Changes in SLURM 2.2.0.pre2
- =============================
- -- Add support for spank_get_item() to get S_STEP_ALLOC_CORES and
- S_STEP_ALLOC_MEM. Support will remain for S_JOB_ALLOC_CORES and
- S_JOB_ALLOC_MEM.
- -- Kill individual job steps that exceed their memory limit rather than
- killing an entire job if one step exceeds its memory limit.
- -- Added configuration parameter VSizeFactor to enforce virtual memory limits
- for jobs and job steps as a percentage of their real memory allocation.
- -- Add scontrol ability to update job step's time limits.
- -- Add scontrol ability to update job's NumCPUs count.
- -- Add --time-min options to salloc, sbatch and srun. The scontrol command
- has been modified to display and modify the new field. sched/backfill
- plugin has been changed to alter time limits of jobs with the
- --time-min option if doing so permits earlier job initiation.
- -- Add support for TotalView symbol MPIR_partial_attach_ok with srun support
- to release processes which TotalView does not attach to.
- -- Add new option for SelectTypeParameters of CR_ONE_TASK_PER_CORE. This
- option will allocate one task per core by default. Without this option,
- by default one task will be allocated per thread on nodes with more than
- one ThreadsPerCore configured.
- -- Avoid accounting separately for a current pid corresponds to a Light Weight
- Process (Thread POSIX) appearing in the /proc directory. Only account for
- the original process (pid==tgid) to avoid accounting for memory use more
- than once.
- -- Add proctrack/cgroup plugin which uses Linux control groups (aka cgroup)
- to track processes on Linux systems having this feature enabled (kernel
- >= 2.6.24).
- -- Add logging of license transations including job_id.
- -- Add configuration parameters SlurmSchedLogFile and SlurmSchedLogLevel to
- support writing scheduling events to a separate log file.
- -- Added contribs/web_apps/chart_stats.cgi, a web app that invokes sreport to
- retrieve from the accounting storage db a user's request for job usage or
- machine utilization statistics and charts the results to a browser.
- -- Massive change to the schema in the storage_accounting/mysql plugin. When
- starting the slurmdbd the process of conversion may take a few minutes.
- You might also see some errors such as 'error: mysql_query failed: 1206
- The total number of locks exceeds the lock table size'. If you get this,
- do not worry, it is because your setting of innodb_buffer_pool_size in
- your my.cnf file is not set or set too low. A decent value there should
- be 64M or higher depending on the system you are running on. See
- RELEASE_NOTES for more information. But setting this and then
- restarting the mysqld and slurmdbd will put things right. After this
- change we have noticed 50-75% increase in performance with sreport and
- sacct.
- -- Fix for MaxCPUs to honor partitions of 1 node that have more than the
- maxcpus for a job.
- -- Add support for "scontrol notify <message>" to work for batch jobs.
- * Changes in SLURM 2.2.0.pre1
- =============================
- -- Added RunTime field to scontrol show job report
- -- Added SLURM_VERSION_NUMBER and removed SLURM_API_VERSION from
- slurm/slurm.h.
- -- Added support to handle communication with SLURM 2.1 clusters. Job's
- should not be lost in the future when upgrading to higher versions of
- SLURM.
- -- Added withdeleted options for listing clusters, users, and accounts
- -- Remove PLPA task affinity functions due to that package being deprecated.
- -- Preserve current partition state information and node Feature and Weight
- information rather than use contents of slurm.conf file after slurmctld
- restart with -R option or SIGHUP. Replace information with contents of
- slurm.conf after slurmctld restart without -R or "scontrol reconfigure".
- See RELEASE_NOTES file fore more details.
- -- Modify SLURM's PMI library (for MPICH2) to properly execute an executable
- program stand-alone (single MPI task launched without srun).
- -- Made GrpCPUs and MaxCPUs limits work for select/cons_res.
- -- Moved all SQL dependant plugins into a seperate rpm slurm-sql. This
- should be needed only where a connection to a database is needed (i.e.
- where the slurmdbd is running)
- -- Add command line option "no_sys_info" to PAM module to supress system
- logging of "access granted for user ...", access denied and other errors
- will still be logged.
- -- sinfo -R now has the user and timestamp in separate fields from the reason.
- -- Much functionality has been added to account_storage/pgsql. The plugin
- is still in a very beta state. It is still highly advised to use the
- mysql plugin, but if you feel like living on the edge or just really
- like postgres over mysql for some reason here you go. (Work done
- primarily by Hongjia Cao, NUDT.)
- * Changes in SLURM 2.1.17
- =========================
- -- Correct format of --begin reported in salloc, sbatch and srun --help
- message.
- -- Correct logic for regular users to increase nice value of their own jobs.
- * Changes in SLURM 2.1.16
- =========================
- -- Fixed minor warnings from gcc-4.5
- -- Fixed initialization of accounting_stroage_enforce in the slurmctld.
- -- Fixed bug where if GrpNodes was lowered while pending jobs existed and where
- above the limit the slurmctld would seg fault.
- -- Fixed minor memory leak when unpack error happens on an
- association_shares_object_t.
- -- Set Lft and Rgt correctly when adding association. Fix for regression
- caused in 2.1.15, cosmetic fix only.
- -- Replaced optarg which was undefined in some spots to make sure ENV vars are
- set up correctly.
- -- When removing an account from a cluster with sacctmgr you no longer get
- a list of previously deleted associations.
- -- Fix to make jobcomp/(pg/my)sql correctly work when the database name is
- different than the default.
- * Changes in SLURM 2.1.15
- =========================
- -- Fix bug in which backup slurmctld can purge job scripts (and kill batch
- jobs) when it assumes primary control, particularly when this happens
- multiple times in a short time interval.
- -- In sched/wiki and sched/wiki2 add IWD (Initial Working Directory) to the
- information reported about jobs.
- -- Fix bug in calculating a daily or weekly reservation start time when the
- reservation is updated. Patch from Per Lundqvist (National Supercomputer
- Centre, Linköping University, Sweden).
- -- Fix bug in how job step memory limits are calculated when the --relative
- option is used.
- -- Restore operation of srun -X option to forward SIGINT to spawned tasks
- without killing them.
- -- Fixed a bug in calculating the root account's raw usage reported by Par
- Andersson
- -- Fixed a bug in sshare displaying account hierarchy reported by Per
- Lundqvist.
- -- In select/linear plugin, when a node allocated to a running job is removed
- from a partition, only log the event once. Fixes problem reported by Per
- Lundqvist.
- * Changes in SLURM 2.1.14
- =========================
- -- Fixed coding mistakes in _slurm_rpc_resv_show() and job_alloc_info() found
- while reviewing the code.
- -- Fix select/cons_res logic to prevent allocating resources while jobs
- previously allocated resources on the node are still completing.
- -- Fixed typo in job_mgr.c dealing with qos instead of associations.
- -- Make sure associations and qos' are initiated when added.
- -- Fixed wrong initialization for wckeys in the association manager.
- -- Added wiki.conf configuration parameter of HidePartitionNodes. See
- "man wiki.conf" for more information.
- -- Add "JobAggregationTime=#" field SchedulerParameter configuration parameter
- output.
- -- Modify init.d/slurm and slurmdbd scripts to prevent the possible
- inadvertent inclusion of "." in LD_LIBRARY_PATH environment variable.
- To fail, the script would need to be executed by user root or SlurmUser
- without the LD_LIBRARY_PATH environment variable set and there would
- have to be a maliciously altered library in the working directory.
- Thanks to Raphael Geissert for identifying the problem. This addresses
- security vulnerability CVE-2010-3380.
- * Changes in SLURM 2.1.13
- =========================
- -- Fix race condition which can set a node state to IDLE on slurmctld startup
- even if it has running jobs.
- * Changes in SLURM 2.1.12
- =========================
- -- Fixes for building on OS X 10.5.
- -- Fixed a few '-' without a '\' in front of them in the man pages.
- -- Fixed issues in client tools where a requeued job did get displayed
- correctly.
- -- Update typos in doc/html/accounting.shtml doc/html/resource_limits.shtml
- doc/man/man5/slurmdbd.conf.5 and doc/man/man5/slurm.conf.5
- -- Fixed a bug in exitcode:signal display in sacct
- -- Fix bug when request comes in for consumable resources and the -c option
- is used in conjunction with -O
- -- Fixed squeue -o "%h" output formatting
- -- Change select/linear message "error: job xxx: best_fit topology failure"
- to debug type.
- -- BLUEGENE - Fix for sinfo -R to group all midplanes together in a single
- line for midplanes in an error state instead of 1 line for each midplane.
- -- Fix srun to work correctly with --uid when getting an allocation
- and creating a step, also fix salloc to assume identity at the correct
- time as well.
- -- BLUEGENE - Fixed issue with jobs being refused when running dynamic mode
- and every job on the system happens to be the same size.
- -- Removed bad #define _SLURMD_H from slurmd/get_mach_stat.h. Didn't appear
- to cause any problems being there, just incorrect syntax.
- -- Validate the job ID when salloc or srun receive an SRUN_JOB_COMPLETE RPC to
- avoid killing the wrong job if the original command exits and the port gets
- re-used by another command right away.
- -- Fix to node in correct state in accounting when updating it to drain from
- scontrol/sview.
- -- BLUEGENE - Removed incorrect unlocking on error cases when starting jobs.
- -- Improve logging of invalid sinfo and squeue print options.
- -- BLUEGENE - Added check to libsched_if to allow root to run even outside of
- SLURM. This is needed when running certain blocks outside of SLURM in HTC
- mode.
- * Changes in SLURM 2.1.11-2
- ===========================
- -- BLUEGENE - make it so libsched_if.so is named correctly on 'L' it is
- libsched_if64.so and on 'P' it is libsched_if.so
- * Changes in SLURM 2.1.11
- =========================
- -- BLUEGENE - fix sinfo to not get duplicate entries when running command
- sinfo -e -o "%9P %6m %.4c %.22F %f"
- -- Fix bug that caused segv when deleting a partition with pending jobs.
- -- Better error message for when trying to modify an account's name with
- sacctmgr.
- -- Added back removal of #include "src/common/slurm_xlator.h" from
- select/cons_res.
- -- Fix incorrect logic in global_accounting in regression tests for
- setting QOS.
- -- BLUEGENE - Fixed issue where removing a small block in dynamic mode,
- and other blocks also in that midplane needed to be removed and were in
- and error state. They all weren't removed correctly in accounting.
- -- Prevent scontrol segv with "scontrol show node <name>" command with nodes
- in a hidden partition.
- -- Fixed sizing of popup grids in sview.
- -- Fixed sacct when querying against a jobid the start time is not set.
- -- Fix configure to get correct version of pkg-config if both 32bit and 64bit
- libs are installed.
- -- Fix issue with sshare not sorting correctly the tree of associations.
- -- Update documentation for sreport.
- -- BLUEGENE - fix regression in 2.1.10 on assigning multiple jobs to one block.
- -- Minor memory leak fixed when killing job error happens.
- -- Fix sacctmgr list assoc when talking to a 2.2 slurmdbd.
- * Changes in SLURM 2.1.10
- =========================
- -- Fix memory leak in sched/builtin plugin.
- -- Fixed sbatch to work correctly when no nodes are specified, but
- --ntasks-per-node is.
- -- Make sure account and wckey for a job are lower case before inserting into
- accounting.
- -- Added note to squeue documentation about --jobs option displaying jobs
- even if they are on hidden partitions.
- -- Fix srun to work correctly with --uid when getting an allocation
- and creating a step.
- -- Fix for when removing a limit from a users association inside the
- fairshare tree the parents limit is now inherited automatically in
- the slurmctld. Previously the slurmctld would have to be restarted.
- This problem only exists when setting a users association limit to -1.
- -- Patch from Matthieu Hautreux (CEA) dealing with possible overflows that
- could come up with the select/cons_res plugin with uint32_t's being treated
- as uint16_t's.
- -- Correct logic for creating a reservation with a Duration=Infinite (used to
- set reservation end time in the past).
- -- Correct logic for creating a reservation that properly handles the OVERLAP
- and IGNORE_JOBS flags (flags were ignored under some conditions).
- -- Fixed a fair-share calculation bug in the priority/multifactor plugin.
- -- Make sure a user entry in the database that was previously deleted is
- restored clean when added back, i.e. remove admin privileges previously
- given.
- -- BLUEGENE - Future start time is set correctly when eligible time for a job
- is in the future, but the job can physically run earlier.
- -- Updated Documentation for sacctmgr for Wall and CPUMin options stating when
- the limit is reached running jobs will be killed.
- -- Fix deadlock issue in the slurmctld when lowering limits in accounting to
- lower than that of pending jobs.
- -- Fix bug in salloc, sbatch and srun that could under some conditions process
- the --threads-per-core, --cores-per-socket and --sockets-per-node options
- improperly.
- -- Fix bug in select/cons_res with memory management plus job preemption with
- job removal (e.g. requeue) which under some conditions failed to preempt
- jobs.
- -- Fix deadlock potential when using qos and associations in the slurmctld.
- -- Update documentation to state --ntasks-per-* is for a maximum value
- instead of an absolute.
- -- Get ReturnToService=2 working for front-end configurations (e.g. Cray or
- BlueGene).
- -- Do not make a non-responding node available for use after running
- "scontrol update nodename=<name> state=resume". Wait for node to respond
- before use.
- -- Added slurm_xlator.h to jobacct_gather plugins so they resolve symbols
- correctly when linking to the slurm api.
- -- You can now update a jobs QOS from scontrol. Previously you could only do
- this from sview.
- -- BLUEGENE - Fixed bug where if running in non-dynamic mode sometimes the
- start time returned for a job when using test-only would not be correct.
- * Changes in SLURM 2.1.9
- ========================
- -- In select/linear - Fix logic to prevent over-subscribing memory with shared
- nodes (Shared=YES or Shared=FORCE).
- -- Fix for handling -N and --ntasks-per-node without specifying -n with
- salloc and sbatch.
- -- Fix jobacct_gather/linux if not polling on tasks to give tasks time to
- start before doing initial gather.
- -- When changing priority with the multifactor plugin we make sure we update
- the last_job_update variable.
- -- Fixed sview for gtk < 2.10 to display correct debug level at first.
- -- Fixed sview to not select too fast when using a mouse right click.
- -- Fixed sacct to display correct timelimits for jobs from accounting.
- -- Fixed sacct when running as root by default query all users as documented.
- -- In proctrack/linuxproc, skip over files in /proc that are not really user
- processes (e.g. "/proc/bus").
- -- Fix documentation bug for slurmdbd.conf
- -- Fix slurmctld to update qos preempt list without restart.
- -- Fix bug in select/cons_res that in some cases would prevent a preempting job
- from using of resources already allocated to a preemptable running job.
- -- Fix for sreport in interactive mode to honor parsable/2 options.
- -- Fixed minor bugs in sacct and sstat commands
- -- BLUEGENE - Fixed issue if the slurmd becomes unresponsive and you have
- blocks in an error state accounting is correct when the slurmd comes
- back up.
- -- Corrected documentation for -n option in srun/salloc/sbatch
- -- BLUEGENE - when running a willrun test along with preemption the bluegene
- plugin now does the correct thing.
- -- Fix possible memory corruption issue which can cause slurmctld to abort.
- -- BLUEGENE - fixed small memory leak when setting up env.
- -- Fixed deadlock if using accounting and cluster changes size in the
- database. This can happen if you mistakenly have multiple primary
- slurmctld's running for a single cluster, which should rarely if ever
- happen.
- -- Fixed sacct -c option.
- -- Critical bug fix in sched/backfill plugin that caused memory corruption.
- * Changes in SLURM 2.1.8
- ========================
- -- Update BUILD_NOTES for AIX and bgp systems on how to get sview to
- build correctly.
- -- Update man page for scontrol when nodes are in the "MIXED" state.
- -- Better error messages for sacctmgr.
- -- Fix bug in allocation of CPUs with select/cons_res and --cpus-per-task
- option.
- -- Fix bug in dependency support for afterok and afternotok options to insure
- that the job's exit status gets checked for dependent jobs prior to puring
- completed job records.
- -- Fix bug in sched/backfill that could set an incorrect expected start time
- for a job.
- -- BLUEGENE - Fix for systems that have midplanes defined in the database
- that don't exist.
- -- Accounting, fixed bug where if removing an object a rollback wasn't
- possible.
- -- Fix possible scontrol stack corruption when listing jobs with very a long
- job or working directory name (over 511 characters).
- -- Insure that SPANK environment variables set by salloc or sbatch get
- propagated to the Prolog on all nodes by setting SLURM_SPANK_* environment
- variables for srun's use.
- -- In sched/wiki2 - Add support for the MODIFYJOB command to alter a job's
- comment field
- -- When a cluster first registers with the SlurmDBD only send nodes in an
- non-usable state. Before all nodes were sent.
- -- Alter sacct to be able to query jobs by association id.
- -- Edit documentation for scontrol stating ExitCode as something not alterable.
- -- Update documentation about ReturnToService and silently rebooting nodes.
- -- When combining --ntasks-per-node and --exclusive in an allocation request
- the correct thing, giving the allocation the entire node but only
- ntasks-per-node, happens.
- -- Fix accounting transaction logs when deleting associations to put the
- ids instead of the lfts which could change over time.
- -- Fix support for salloc, sbatch and srun's --hint option to avoid allocating
- a job more sockets per node or more cores per socket than desired. Also
- when --hint=compute_bound or --hint=memory_bound then avoid allocating more
- than one task per hyperthread (a change in behavior, but almost certainly
- a preferable mode of operation).
- * Changes in SLURM 2.1.7
- ========================
- -- Modify srun, salloc and sbatch parsing for the --signal option to accept
- either a signal name in addition to the previously supported signal
- numbers (e.g. "--signal=USR2@200").
- -- BLUEGENE - Fixed sinfo --long --Node output for cpus on a single cnode.
- -- In sched/wiki2 - Fix another logic bug in support of Moab being able to
- identify preemptable jobs.
- -- In sched/wiki2 - For BlueGene systems only: Fix bug preventing Moab from
- being able to correctly change the node count of pending jobs.
- -- In select/cons_res - Fix bug preventing job preemption with a configuration
- of Shared=FORCE:1 and PreemptMode=GANG,SUSPEND.
- -- In the TaskProlog, add support for an "unset" option to clear environment
- variables for the user application. Also add support for embedded white-
- space in the environment variables exported to the user application
- (everything after the equal sign to the end of the line is included without
- alteration).
- -- Do not install /etc/init.d/slurm or /etc/init.d/slurmdbd on AIX systems.
- -- BLUEGENE - fixed check for small blocks if a node card of a midplane is
- in an error state other jobs can still run on the midplane on other
- nodecards.
- -- BLUEGENE - Check to make sure job killing is in the active job table in
- DB2 when killing the job.
- -- Correct logic to support ResvOverRun configuration parameter.
- -- Get --acctg-freq option working for srun and salloc commands.
- -- Fix sinfo display of drained nodes correctly with the summarize flag.
- -- Fix minor memory leaks in slurmd and slurmstepd.
- -- Better error messages for failed step launch.
- -- Modify srun to insure compatability of the --relative option with the node
- count requested.
- * Changes in SLURM 2.1.6-2
- ==========================
- -- In sched/wiki2 - Fix logic in support of Moab being able to identify
- preemptable jobs.
- -- Applied fixes to a debug4 message in priority_multifactor.c sent in by
- Per Lundqvist
- -- BLUEGENE - Fixed issue where incorrect nodecards could be picked when
- looking at combining small blocks to make a larger small block.
- * Changes in SLURM 2.1.6
- ========================
- -- For newly submitted jobs, report expected start time in squeue --start as
- "N/A" rather than current time.
- -- Correct sched/backfill logic so that it runs in a more timely fashion.
- -- Fixed issue if running on accounting cache and priority/multifactor to
- initialize the root association when the database comes back up.
- -- Emulated BLUEGENE - fixed issue where blocks weren't always created
- correctly when loading from state. This does not apply to a real
- bluegene system, only emulated.
- -- Fixed bug when job is completing and its cpu_cnt would be calculated
- incorrectly, possibly resulting in an underflow being logged.
- -- Fixed bug where if there are pending jobs in a partition which was
- updated to have no nodes in it the slurmctld would dump core.
- -- Fixed smap and sview to display partitions with no nodes in them.
- -- Improve configure script's logic to detect LUA libraries.
- -- Fix bug that could cause slurmctld to abort if select/cons_res is used AND a
- job is submitted using the --no-kill option AND one of the job's nodes goes
- DOWN AND slurmctld restarts while that job is still running.
- -- In jobcomp plugins, job time limit was sometimes recorded improperly if not
- set by user (recorded huge number rather than partition's time limit).
- * Changes in SLURM 2.1.5
- ========================
- -- BLUEGENE - Fixed display of draining nodes for sinfo -R.
- -- Fixes to scontrol and sview when setting a job to an impossible start time.
- -- Added -h to srun for help.
- -- Fix for sacctmgr man page to remove erroneous 'with' statements.
- -- Fix for unpacking jobs with accounting statistics, previously it appears
- only steps were unpacked correctly, for the most case sacct would only
- display this information making this fix a very minor one.
- -- Changed scontrol and sview output for jobs with unknown end times from
- 'NONE' to 'Unknown'.
- -- Fixed mysql plugin to reset classification when adding a
- previously deleted cluster.
- -- Permit a batch script to reset umask and have that propagate to tasks
- spawed by subsequent srun. Previously the umask in effect when sbatch was
- executed was propagated to tasks spawed by srun.
- -- Modify slurm_job_cpus_allocated_on_node_id() and
- slurm_job_cpus_allocated_on_node() functions to not write explanation of
- failures to stderr. Only return -1 and set errno.
- -- Correction in configurator.html script. Prolog and Epilog were reversed.
- -- BLUEGENE - Fixed race condition where if a nodecard has an error on an
- un-booted block when a job comes to use it before the state checking
- thread notices it which could cause the slurmctld to lock up on a
- non-dynamic system.
- -- In select/cons_res with FastSchedule=0 and Procs=# defined for the node,
- but no specific socket/core/thread count configured, avoid fatal error if
- the number of cores on a node is less than the number of Procs configured.
- -- Added ability for the perlapi to utilize opaque data types returned from
- the C api.
- -- BLUEGENE - made the perlapi get correct values for cpus per node,
- Previously it would give the number of cpus per cnode instead of midplane.
- -- BLEUGENE - Fixed issue where if a block being selected for a job to use
- and during the process a hardware failure happens, previously the block
- would still be allowed to be used which would fail or requeue the job
- depending on the configuration.
- -- For SPANK job environment, avoid duplicate "SPANK_" prefix for environment
- set by sbatch jobs.
- -- Make squeue select jobs on hidden partitions when requesting more than one.
- -- Avoid automatically cancelling job steps when all of the tasks on some node
- have gracefully terminated.
- * Changes in SLURM 2.1.4
- ========================
- -- Fix for purge script in accounting to use correct options.
- -- If SelectType=select/linear and SelectTypeParameters=CR_Memory fix bug that
- would fail to release memory reserved for a job if "scontrol reconfigure"
- is executed while the job is in completing state.
- -- Fix bug in handling event trigger for job time limit while job is still
- in pending state.
- -- Fixed display of Ave/MaxCPU in sacct for jobs. Steps were printed
- correctly.
- -- When node current features differs from slurm.conf, log the node names
- using a hostlist expression rather than listing individual node names.
- -- Improve ability of srun to abort job step for some task launch failures.
- -- Fix mvapich plugin logic to release the created job allocation on
- initialization failure (previously the failures would cancel job step,
- but retain job allocation).
- -- Fix bug in srun for task count so large that it overflows int data type.
- -- Fix important bug in select/cons_res handling of ntasks-per-core parameter
- that was uncovered by a bug fixed in v2.1.3. Bug produced fatal error for
- slurmctld: "cons_res: cpus computation error".
- -- Fix bug in select/cons_res handling of partitions configured with
- Shared=YES. Prior logic failed to support running multiple jobs per node.
- * Changes in SLURM 2.1.3-2
- ==========================
- -- Modified spec file to obsolete pam_slurm when installing
- the slurm-pam_slurm rpm.
- * Changes in SLURM 2.1.3-1
- ==========================
- -- BLUEGENE - Fix issues on static/overlap systems where if a midplane
- was drained you would not be able to create new blocks on it.
- -- In sched/wiki2 (for Moab): Add excluded host list to job information
- using new keyword "EXCLUDE_HOSTLIST".
- -- Correct slurmd reporting of incorrect socket/core/thread counts.
- -- For sched/wiki2 (Moab): Do not extend a job's end time for suspend/resume
- or startup delay due to node boot time. A job's end time will always be
- its start time plus time limit.
- -- Added build-time option (to configure program) of --with-pam_dir to
- specify the directory into which PAM modules get installed, although it
- should pick the proper directory by default. "make install" and "rpmbuild"
- should now put the pam_slurm.so file in the proper directory.
- -- Modify PAM module to link against SLURM API shared library and use exported
- slurm_hostlist functions.
- -- Do not block new jobs with --immediate option while another job is in the
- process of being requeued (which can take a long time for some node failure
- modes).
- -- For topology/tree, log invalid hostnames in a single hostlist expression
- rather than one per line.
- -- A job step's default time limit will be UNLIMITED rather than partition's
- default time limit. The step will automatically be cancelled as part of the
- job termination logic when the job's time limit is reached.
- -- sacct - fixed bug when checking jobs against a reservation
- -- In select/cons_res, fix support for job allocation with --ntasks_per_node
- option. Previously could allocate too few CPUs on some nodes.
- -- Adjustment made to init message to the slurmdbd to allow backwards
- compatibility with future 2.2 release. YOU NEED TO UPGRADE SLURMDBD
- BEFORE ANYTHING ELSE.
- -- Fix accounting when comment of down/drained node has double quotes in it.
- * Changes in SLURM 2.1.2
- ========================
- -- Added nodelist to sview for jobs on non-bluegene systems
- -- Correction in value of batch job environment variable SLURM_TASKS_PER_NODE
- under some conditions.
- -- When a node silently fails which is already drained/down the reason
- for draining for the node is not changed.
- -- Srun will ignore SLURM_NNODES environment variable and use the count of
- currently allocated nodes if that count changes during the job's lifetime
- (e.g. job allocation uses the --no-kill option and a node goes DOWN, job
- step would previously always fail).
- -- Made it so sacctmgr can't add blank user or account. The MySQL plugin
- will also reject such requests.
- -- Revert libpmi.so version for compatibility with SLURM version 2.0 and
- earlier to avoid forcing applications using a specific libpmi.so version to
- rebuild unnecessarily (revert from libpmi.so.21.0.0 to libpmi.so.0.0.0).
- -- Restore support for a pending job's constraints (required node features)
- when slurmctld is restarted (internal structure needed to be rebuilt).
- -- Removed checkpoint_blcr.so from the plugin rpm in the slurm.spec since
- it is also in the blcr rpm.
- -- Fixed issue in sview where you were unable to edit the count
- of jobs to share resources.
- -- BLUEGENE - Fixed issue where tasks on steps weren't being displayed
- correctly with scontrol and sview.
- -- BLUEGENE - fixed wiki2 plugin to report correct task count for pending
- jobs.
- -- BLUEGENE - Added /etc/ld.so.conf.d/slurm.conf to point to the
- directory holding libsched_if64.so when building rpms.
- -- Adjust get_wckeys call in slurmdbd to allow operators to list wckeys.
- * Changes in SLURM 2.1.1
- ========================
- -- Fix for case sensitive databases when a slurmctld has a mixed case
- clustername to lower case the string to easy compares.
- -- Fix squeue if job is completing and failed to print remaining
- nodes instead of failed message.
- -- Fix sview core when searching for partitions by state.
- -- Fixed setting the start time when querying in sacct to the
- beginning of the day if not set previously.
- -- Defined slurm_free_reservation_info_msg and slurm_free_topo_info_msg
- in common/slurm_protocol_defs.h
- -- Avoid generating error when a job step includes a memory specification and
- memory is not configured as a consumable resource.
- -- Patch for small memory leak in src/common/plugstack.c
- -- Fix sview search on node state.
- -- Fix bug in which improperly formed job dependency specification can cause
- slurmctld to abort.
- -- Fixed issue where slurmctld wouldn't always get a message to send cluster
- information when registering for the first time with the slurmdbd.
- -- Add slurm_*_trigger.3 man pages for event trigger APIs.
- -- Fix bug in job preemption logic that would free allocated memory twice.
- -- Fix spelling issues (from Gennaro Oliva)
- -- Fix issue when changing parents of an account in accounting all children
- weren't always sent to their respected slurmctlds until a restart.
- -- Restore support for srun/salloc/sbatch option --hint=nomultithread to
- bind tasks to cores rather than threads (broken in slurm v2.1.0-pre5).
- -- Fix issue where a 2.0 sacct could not talk correctly to a 2.1 slurmdbd.
- -- BLUEGENE - Fix issue where no partitions have any nodes assigned them to
- alert user no blocks can be created.
- -- BLUEGENE - Fix smap to put BGP images when using -Dc on a Blue Gene/P
- system.
- -- Set SLURM_SUBMIT_DIR environment variable for srun and salloc commands to
- match behavior of sbatch command.
- -- Report WorkDir from "scontrol show job" command for jobs launched using
- salloc and srun.
- -- Update correctly the wckey when changing it on a pending job.
- -- Set wckeyid correctly in accounting when cancelling a pending job.
- -- BLUEGENE - critical fix where jobs would be killed incorrectly.
- -- BLUEGENE - fix for sview putting multiple ionodes on to nodelists when
- viewing the jobs tab.
- * Changes in SLURM 2.1.0
- ========================
- -- Improve sview layout of blocks in use.
- -- A user can now change the dimensions of the grid in sview.
- -- BLUEGENE - improved startup speed further for large numbers of defined
- blocks
- -- Fix to _get_job_min_nodes() in wiki2/get_jobs.c suggested by Michal Novotny
- -- BLUEGENE - fixed issues when updating a pending job when a node
- count was incorrect for the asked for connection type.
- -- BLUEGENE - fixed issue when combining blocks that are in ready states to
- make a larger block from those or make multiple smaller blocks by
- splitting the larger block. Previously this would only work with block
- in a free state.
- -- Fix bug in wiki(2) plugins where if HostFormat=2 and the task list is
- greater than 64 we don't truncate. Previously this would mess up Moab
- by sending a truncated task list when doing a get jobs.
- -- Added update slurmctld debug level to sview when in admin mode.
- -- Added logic to make sure if enforcing a memory limit when using the
- jobacct_gather plugin a user can no longer turn off the logic to enforce
- the limit.
- -- Replaced many calls to getpwuid() with reentrant uid_to_string()
- -- The slurmstepd will now refresh it's log file handle on a reconfig,
- previously if a log was rolled any output from the stepd was lost.
|