<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-7346520062335584992.post6536862248220855389..comments</id><updated>2011-03-14T12:42:13.399+13:00</updated><category term='public sector information'/><category term='NUC'/><category term='DDI'/><category term='digital new zealand'/><category term='Libraries and  Archives Canada'/><category term='social design'/><category term='link text'/><category term='books'/><category term='collaboration'/><category term='archives new zealand'/><category term='development'/><category term='strategy'/><category term='Papers Past'/><category term='digital future'/><category term='Google Book Search'/><category term='privacy'/><category term='heritage'/><category term='technology in libraries'/><category term='analytics'/><category term='book digitisation'/><category term='ndf2008'/><category term='Elliott Young'/><category term='digital curation'/><category term='academic libraries'/><category term='Deep Packet Inspection'/><category term='web trends'/><category term='NDHA'/><category term='social bookmarking'/><category term='Mike Kmiec'/><category term='image retrieval'/><category term='library collections'/><category term='collection management'/><category term='social networking sites'/><category term='digital products'/><category term='library consortia'/><category term='wikimedia'/><category term='electronic news delivery'/><category term='Project Gutenberg'/><category term='learning commons'/><category term='open access'/><category term='myspace'/><category term='virtual worlds'/><category term='mashup'/><category term='indigenous education'/><category term='federated searching'/><category term='real time'/><category term='EYC'/><category term='reading'/><category term='digital repositories'/><category term='PDF'/><category term='ethernet standard'/><category term='digital scholarship'/><category term='JPEG'/><category term='integrated library system platforms'/><category term='digital libraries'/><category term='information'/><category term='UX'/><category term='mobile internet'/><category term='creative industries'/><category term='social web'/><category term='numeracy'/><category term='literacy'/><category term='creative economy'/><category term='digital technologies'/><category term='ITIL'/><category term='WSIS'/><category term='iPhone'/><category term='school libraries'/><category term='web user trends'/><category term='internet safety'/><category term='flickr'/><category term='Simon'/><category term='innovation'/><category term='web writing'/><category term='TheSourceNLNZ'/><category term='OSMOSIS'/><category term='McLean Papers'/><category term='marketing'/><category term='design'/><category term='governance'/><category term='net neutrality'/><category term='blogging'/><category term='remote workers'/><category term='reference librarians'/><category term='FRBR'/><category term='google'/><category term='digital music'/><category term='digital heritage'/><category term='electronic publishing'/><category term='education'/><category term='technology'/><category term='information architecture'/><category term='innovation systems'/><category term='social software'/><category term='linked data'/><category term='greenstone'/><category term='teachers and teaching'/><category term='National Library of Wales'/><category term='Generation Y'/><category term='digital identity management'/><category term='Europeana'/><category term='digital strategies'/><category term='creative commons'/><category term='Library 2.0'/><category term='JPEG 2000'/><category term='human resources'/><category term='search engine optimisation'/><category term='scholarly publishing'/><category term='PREMIS'/><category term='librarians'/><category term='economic value of libraries'/><category term='NDF2007'/><category term='Manuscripts + Pictorial'/><category term='survey'/><category term='user interface'/><category term='library clients'/><category term='Omeka'/><category term='digital cameras'/><category term='the commons'/><category term='access'/><category term='podcasts'/><category term='Freedom of Information'/><category term='Memento Project'/><category term='photosynth'/><category term='open standards'/><category term='e-learning'/><category term='bibliographic records'/><category term='mw2008'/><category term='LIANZA'/><category term='supercomputers'/><category term='Facebook'/><category term='India'/><category term='usability'/><category term='database licensing'/><category term='Presidential libraries'/><category term='web harvest'/><category term='hardcopy books'/><category term='personal data protection'/><category term='del.icio.us'/><category term='knowledge'/><category term='navigation'/><category term='arts'/><category term='ebooks'/><category term='music libraries'/><category term='film preservation'/><category term='cloud computing'/><category term='scientific publishing'/><category term='Harvard University'/><category term='web archiving'/><category term='&quot;Internet of Things&quot;'/><category term='electronic journals'/><category term='ndf2009'/><category term='webstock08'/><category term='QR codes'/><category term='scholarship'/><category term='data preservation'/><category term='e-books'/><category term='authorities'/><category term='internet governance'/><category term='Douglas Campbell'/><category term='metadata harvesters'/><category term='mash-ups'/><category term='digital economy'/><category term='libraries'/><category term='networks'/><category term='library automation'/><category term='publishing'/><category term='World Digital Library'/><category term='public media'/><category term='Video-in-print'/><category term='wikipedia'/><category term='derivatives'/><category term='copyright'/><category term='learning institutions'/><category term='library technologies'/><category term='Children'/><category term='social networking tools'/><category term='digital publishing'/><category term='cultural institutions'/><category term='twitter'/><category term='economic impact'/><category term='visual resources'/><category term='learning spaces'/><category term='information technology'/><category term='competencies'/><category term='standards'/><category term='project management'/><category term='National Library of Scotland'/><category term='knowledge economy'/><category term='social media'/><category term='metadata schemas'/><category term='innovation policies'/><category term='digital books'/><category term='web platforms'/><category term='high-speed broadband'/><category term='Science Commons'/><category term='e-strategies'/><category term='g'/><category term='data transfer'/><category term='internet behaviour'/><category term='metasearch'/><category term='datasets'/><category term='institutional repositories'/><category term='digital access'/><category term='lianza09'/><category term='young adults'/><category term='European research libraries'/><category term='Matapihi'/><category term='rights'/><category term='digital divide'/><category term='cultural funding'/><category term='re-use'/><category term='internet access'/><category term='Distributed Work Environments (DWEs)'/><category term='open source'/><category term='special collections'/><category term='virtual reference'/><category term='access to technology'/><category term='German Digital Library'/><category term='Andy Neale'/><category term='preservation'/><category term='digital literacy'/><category term='global research'/><category term='information literacy'/><category term='library services'/><category term='accessibility'/><category term='eLinks'/><category term='Virginia Gow'/><category term='new media'/><category term='society'/><category term='nzetc'/><category term='web 2.0'/><category term='Google Image Labeler'/><category term='research libraries'/><category term='Web 3.0'/><category term='OCLC'/><category term='indigenous knowledge'/><category term='interface technologies'/><category term='research data'/><category term='LIAC'/><category term='libraries and access'/><category term='geotagging'/><category term='outages'/><category term='future of print'/><category term='robots.txt'/><category term='semantic web'/><category term='Association of Research Libraries'/><category term='closed captions'/><category term='XML'/><category term='Generation X'/><category term='web standards'/><category term='digital audio'/><category term='Ex Libris'/><category term='photo collections'/><category term='digital information'/><category term='E-readers'/><category term='technology trends'/><category term='collaborative spaces'/><category term='internet censorship'/><category term='web security'/><category term='information resources'/><category term='electronic resources'/><category term='colour printing'/><category term='mobile technologies'/><category term='communication of memory'/><category term='Virtualisation'/><category term='persistent identifiers'/><category term='art preservation'/><category term='people'/><category term='catalogues'/><category term='user-centered design'/><category term='scanning'/><category term='public libraries'/><category term='digital forensics'/><category term='koninklijke biblioteek'/><category term='government libraries'/><category term='web sites'/><category term='scholarly communication'/><category term='crowdsourcing'/><category term='dublin core'/><category term='dc2007'/><category term='national digital heritage archive'/><category term='research collections'/><category term='Change Managment'/><category term='orphan works'/><category term='electronic paper'/><category term='web design'/><category term='open society'/><category term='public library programmes'/><category term='e-journals'/><category term='Google Maps'/><category term='media'/><category term='Kindle'/><category term='multimedia technologies'/><category term='M-Libraries'/><category term='optical character recognition'/><category term='personal digital archives'/><category term='search engines'/><category term='cultural philanthropy'/><category term='reputation'/><category term='information/knowledge'/><category term='open data'/><category term='cultural heritage'/><category term='web development'/><category term='Chelsea Hughes'/><category term='social inclusion'/><category term='online catalogues'/><category term='National Archives UK'/><category term='academic publishing'/><category term='Configuration Management'/><category term='GLAMS'/><category term='digital preservation'/><category term='Archives'/><category term='parallel imports'/><category term='e-government'/><category term='nz on screen'/><category term='social networking'/><category term='metrics'/><category term='e-govt guidelines'/><category term='Quick Response Codes'/><category term='European Library'/><category term='spoken word collections'/><category term='internet'/><category term='electronic literature'/><category term='digitisation'/><category term='information society'/><category term='Gordon Paynter'/><category term='LIANZA07'/><category term='social objects'/><category term='digital media'/><category term='rfid'/><category term='Courtney Johnston'/><category term='web resources'/><category term='eResearch'/><category term='libraries and value'/><category term='digital collections'/><category term='research'/><category term='ICTs'/><category term='Primo'/><category term='Find'/><category term='community democracy'/><category term='academic journals'/><category term='literary publishing'/><category term='library vending machines'/><category term='children&apos;s library services'/><category term='culture'/><category term='broadband'/><category term='librarianship'/><category term='recession - impact'/><category term='universities'/><category term='engage your community'/><category term='community management'/><category term='YouTube'/><category term='mpnatlib'/><category term='sustainable economics'/><category term='museums'/><category term='distance education'/><category term='e-publishing'/><category term='electronic records management'/><category term='newspapers'/><category term='British Library'/><category term='ILS'/><category term='digital content strategy'/><category term='wisdom'/><category term='cultural economy'/><category term='OpenAIRE'/><category term='urbanisation'/><category term='semantic publsihing'/><category term='Aditya (Eddie) Anand'/><category term='intellectual property'/><category term='digital melting pot'/><category term='Sarah Jane Barnett'/><category term='tagging'/><category term='maps'/><category term='publishers'/><category term='data'/><category term='digital natives'/><category term='library software'/><category term='metadata'/><category term='discovery'/><title type='text'>Comments on LibraryTechNZ: 2008 Web Harvest - Let us know how we can make it ...</title><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://librarytechnz.natlib.govt.nz/feeds/6536862248220855389/comments/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html'/><author><name>National Library of New Zealand</name><uri>http://www.blogger.com/profile/05067703181520460430</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>23</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-7141452066490174544</id><published>2008-10-21T14:15:00.000+13:00</published><updated>2008-10-21T14:15:00.000+13:00</updated><title type='text'>Hi Andrew:&lt;br&gt;&lt;br&gt;We've posted a response to a cou...</title><content type='html'>Hi Andrew:&lt;BR/&gt;&lt;BR/&gt;We've posted a response to a couple more frequently asked questions to the &lt;A HREF="http://www.natlib.govt.nz/about-us/news/20-october-2008-web-harvest-faqs" REL="nofollow"&gt;FAQ&lt;/A&gt;, including explaining why the harvester is off-shore and why we didn't notify webmasters in advance. I think these are both areas for improvement should we do another harvest.&lt;BR/&gt;&lt;BR/&gt;Gordon</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7141452066490174544'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7141452066490174544'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224551700000#c7141452066490174544' title=''/><author><name>Gordon Paynter</name><uri>http://www.blogger.com/profile/13375515204887559709</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-808786521'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-2954756489731746980</id><published>2008-10-21T14:12:00.000+13:00</published><updated>2008-10-21T14:12:00.000+13:00</updated><title type='text'>Sigh... If they crawl more than a couple of GB fro...</title><content type='html'>Sigh... If they crawl more than a couple of GB from my site, or if they take down the server, they will be receiving an invoice from me for the international bandwidth used and for loss of business. Robots.txt is there for a reason!!!</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/2954756489731746980'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/2954756489731746980'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224551520000#c2954756489731746980' title=''/><author><name>Jason Franks</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-81990382'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-3898145631028847859</id><published>2008-10-21T14:09:00.000+13:00</published><updated>2008-10-21T14:09:00.000+13:00</updated><title type='text'>Hi anonymous:&lt;br&gt;&lt;br&gt;You note that &lt;i&gt;"the Interne...</title><content type='html'>Hi anonymous:&lt;BR/&gt;&lt;BR/&gt;You note that &lt;I&gt;"the Internet Archives Wayback Index) to look up the names of hosts that are physically in New Zealand"&lt;/I&gt; does not seem possible... and its not.&lt;BR/&gt;&lt;BR/&gt;Something got mixed up, and this is meant to refer to &lt;I&gt;the Alexa web search API&lt;/I&gt;. We'll update the page.&lt;BR/&gt;&lt;BR/&gt;Gordon</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/3898145631028847859'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/3898145631028847859'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224551340000#c3898145631028847859' title=''/><author><name>Gordon Paynter</name><uri>http://www.blogger.com/profile/13375515204887559709</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-808786521'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-6393842272746779840</id><published>2008-10-21T10:47:00.000+13:00</published><updated>2008-10-21T10:47:00.000+13:00</updated><title type='text'>@boris - aha, there was discussion ... darn, misse...</title><content type='html'>@boris - aha, there was discussion ... darn, missed it at the time - did anyone submit?</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6393842272746779840'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6393842272746779840'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224539220000#c6393842272746779840' title=''/><author><name>Mike Riversdale</name><uri>http://www.blogger.com/profile/00112999693425305730</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_LB2G41LWFXg/SNgp5teiCWI/AAAAAAAAAMQ/NOmZTBIzSAA/s1600-R/2841083599_b09ba8c507.jpg%3Fv%3D0'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-795015503'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-7226368280840527728</id><published>2008-10-21T10:45:00.000+13:00</published><updated>2008-10-21T10:45:00.000+13:00</updated><title type='text'>Thanks for being a part of this obviously thorny d...</title><content type='html'>Thanks for being a part of this obviously thorny discussion and for adding to the FAQ.&lt;BR/&gt;&lt;BR/&gt;When you guys and gals come to do the next crawl maybe some prior discussion and heads-up would alleviate some of the slightly bad feeling being generated. Working together is always better than finding out after the event.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7226368280840527728'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7226368280840527728'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224539100000#c7226368280840527728' title=''/><author><name>Mike Riversdale</name><uri>http://www.blogger.com/profile/00112999693425305730</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_LB2G41LWFXg/SNgp5teiCWI/AAAAAAAAAMQ/NOmZTBIzSAA/s1600-R/2841083599_b09ba8c507.jpg%3Fv%3D0'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-795015503'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-6069622475521642848</id><published>2008-10-20T21:41:00.000+13:00</published><updated>2008-10-20T21:41:00.000+13:00</updated><title type='text'>It's very disappointing that National Library is c...</title><content type='html'>It's very disappointing that National Library is choosing to ignore good internet citizenship in order to achieve its ends. It's disingenuous to ask "how we can make it better for you" when electronic documents are being collected with the latent threat of a $5,000 fine for any publisher that does not comply with making the documents available (see s40 of the National Library Act 2003, http://legislation.govt.nz/act/public/2003/0019/latest/DLM192266.html?search=ts_all%40act%40bill%40regulation_national+library ).&lt;BR/&gt;&lt;BR/&gt;I'll co-operate because I can't afford that fine, but don't expect me to like it. I note that the Internet Archive, to their credit, honours robots.txt (http://www.archive.org/about/exclude.php )</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6069622475521642848'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6069622475521642848'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224492060000#c6069622475521642848' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1210490438'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-6563788126711203152</id><published>2008-10-20T21:14:00.000+13:00</published><updated>2008-10-20T21:14:00.000+13:00</updated><title type='text'>I guess we must remember that NLNZ has the legal r...</title><content type='html'>I guess we must remember that NLNZ has the legal right to collect your site - in full&lt;BR/&gt;&lt;BR/&gt;You had a chance to have a conversation with them about that in the consulation that occured about &lt;B&gt;National Library of New Zealand (Te Puna Mātauranga o Aotearoa) Act 2003&lt;/B&gt;  (see &lt;A HREF="http://www.legislation.govt.nz/act/public/2003/0019/latest/whole.html?search=ts_act_National+Library+of+New+Zealand+(Te+Puna+M%C4%81tauranga+o+Aotearoa)+Act+2003#DLM191962" REL="nofollow"&gt;http://www.legislation.govt.nz/act/public/2003/0019/latest/whole.html?search=ts_act_National+Library+of+New+Zealand+(Te+Puna+M%C4%81tauranga+o+Aotearoa)+Act+2003#DLM191962&lt;/A&gt;&lt;BR/&gt;&lt;BR/&gt;I guess the conversations and submissions are buried in a vault - but sometime will be unveiled along with the various harvests that the nlnz is doing</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6563788126711203152'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6563788126711203152'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224490440000#c6563788126711203152' title=''/><author><name>Boris</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1176063810'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-190920027560443834</id><published>2008-10-20T18:59:00.000+13:00</published><updated>2008-10-20T18:59:00.000+13:00</updated><title type='text'>If you're picking on sites physically hosted in NZ...</title><content type='html'>If you're picking on sites physically hosted in NZ then scraping them from a source address that was also within New Zealand would make it a lot easier for some of us.  Not to mention cheaper.&lt;BR/&gt;&lt;BR/&gt;I know that National Library has a Citylink connection.  I can't understand why this is not using it?  Why are you exporting all of our content to San Jose, and charging us for that?&lt;BR/&gt;&lt;BR/&gt;Cheers,&lt;BR/&gt;Andrew.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/190920027560443834'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/190920027560443834'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224482340000#c190920027560443834' title=''/><author><name>Andrew McMillan</name><uri>http://andrew.mcmillan.net.nz/</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-455083223'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-4509320290200439658</id><published>2008-10-20T17:44:00.000+13:00</published><updated>2008-10-20T17:44:00.000+13:00</updated><title type='text'>October 16, 2008 at 9:12 PM, Tim Snadden said... &lt;...</title><content type='html'>October 16, 2008 at 9:12 PM, Tim Snadden said... &lt;BR/&gt;"&lt;I&gt;You say 'let us know, we'll make it stop'. By creating a robots.txt file the site owner *has* let you know and you are ignoring their wishes.&lt;/I&gt;"&lt;BR/&gt;&lt;BR/&gt;As a web publisher myself, I whole-heartedly agree with this viewpoint. The "Robots.txt" file is there to control release of information, prevent direct grabbing of pages and files NOT for general distribution and indexing, and basically to let 'agents' know what they are allowed to grab and what they are required to leave alone.&lt;BR/&gt;&lt;BR/&gt;I for one will be blocking your harvesting bot directly, even if it means turning down your 'offer' of posterity.&lt;BR/&gt;&lt;BR/&gt;Also, it would be prudent to allow the 'harvestees' to request that you delete any harvested content, since it was been scraped without implicit consent.&lt;BR/&gt;&lt;BR/&gt;Thank you for bringing to light your utter disregard of our wishes.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/4509320290200439658'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/4509320290200439658'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224477840000#c4509320290200439658' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1859780861'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-5258528244175839189</id><published>2008-10-20T17:05:00.000+13:00</published><updated>2008-10-20T17:05:00.000+13:00</updated><title type='text'>2. The crawl engineers used several available serv...</title><content type='html'>2. The crawl engineers used several available services (eg the Internet Archives Wayback Index) to look up the names of hosts that are physically in New Zealand but not registered in the nz domain.&lt;BR/&gt;&lt;BR/&gt;You state this, but in my mind this doesn't actually sound possible.&lt;BR/&gt;&lt;BR/&gt;How can the internet archives wayback index tell you if a host is in New Zealand or not?</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/5258528244175839189'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/5258528244175839189'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224475500000#c5258528244175839189' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-2087689096'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-7166103803972226084</id><published>2008-10-20T16:55:00.000+13:00</published><updated>2008-10-20T16:55:00.000+13:00</updated><title type='text'>Link to the &lt;a href="http://www.natlib.govt.nz/abo...</title><content type='html'>Link to the &lt;A HREF="http://www.natlib.govt.nz/about-us/news/20-october-2008-web-harvest-faqs" REL="nofollow"&gt;Web Harvest FAQs&lt;/A&gt;.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7166103803972226084'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7166103803972226084'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224474900000#c7166103803972226084' title=''/><author><name>Courtney Johnston</name><uri>http://www.blogger.com/profile/13465703476413455843</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1293390714'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-8709292228243134415</id><published>2008-10-20T16:32:00.001+13:00</published><updated>2008-10-20T16:32:00.001+13:00</updated><title type='text'>Hi Dave: &lt;br&gt;&lt;br&gt;I have also answered your questio...</title><content type='html'>Hi Dave: &lt;BR/&gt;&lt;BR/&gt;I have also answered your question about very large sites directly in the FAQ. To provide a little more detail, we are trying to make the crawl broad rather than deep, so if your website is very large, then the chances are we won’t capture it all. &lt;BR/&gt;&lt;BR/&gt;However, we don’t yet know how deep we can get into large websites.  Consider this: we’re currently aiming to harvest 100 million URLs from just over 300,000 hosts. On the face of it that’s an average of 333 URLs per host. However, a lot of hosts will be empty, or redirects, or small. Here’s another case study though: in a recent domain harvest the BNF (French National Library) about half the .fr domains had 10 URLs or less, and only about 0.04% were crawled beyond 10,000 URLs (see “Legal deposit of the French Web” on http://iwaw.net/08/). It therefore seems unlikely we will be harvesting 10s of millions of URLs from your servers.&lt;BR/&gt;&lt;BR/&gt;Gordon</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8709292228243134415'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8709292228243134415'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224473520001#c8709292228243134415' title=''/><author><name>Gordon Paynter</name><uri>http://www.blogger.com/profile/13375515204887559709</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-808786521'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-5842279928343881056</id><published>2008-10-20T16:32:00.000+13:00</published><updated>2008-10-20T16:32:00.000+13:00</updated><title type='text'>Hi Mike and Mike:&lt;br&gt;&lt;br&gt;I’ve added your questions...</title><content type='html'>Hi Mike and Mike:&lt;BR/&gt;&lt;BR/&gt;I’ve added your questions (about where the list of names comes from, and how we find websites hosted inside and outside New Zealand) to a FAQ page, which you can find here:&lt;BR/&gt;http://www.natlib.govt.nz/about-us/news/20-october-2008-web-harvest-faqs&lt;BR/&gt;&lt;BR/&gt;It’s all a bit complex, but I hope that covers it off.&lt;BR/&gt;&lt;BR/&gt;Gordon</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/5842279928343881056'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/5842279928343881056'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224473520000#c5842279928343881056' title=''/><author><name>Gordon Paynter</name><uri>http://www.blogger.com/profile/13375515204887559709</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-808786521'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-7736817349249544109</id><published>2008-10-18T03:11:00.000+13:00</published><updated>2008-10-18T03:11:00.000+13:00</updated><title type='text'>interesting the non response to issues raised here...</title><content type='html'>interesting the non response to issues raised here</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7736817349249544109'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/7736817349249544109'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224252660000#c7736817349249544109' title=''/><author><name>I see aship in the harbour</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-2105950392'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-8230174373327032227</id><published>2008-10-17T09:04:00.000+13:00</published><updated>2008-10-17T09:04:00.000+13:00</updated><title type='text'>I don't think trying to apply a paper model to the...</title><content type='html'>I don't think trying to apply a paper model to the internet is practical.&lt;BR/&gt;&lt;BR/&gt;I run several large NZ websites containing literally tens of millions of indexable pages and unknown quantities of dynamic pages (search results etc).  Thousands of pages change daily and total content is several hundred gigabyte, and lot of that is video and imagery.&lt;BR/&gt;&lt;BR/&gt;Do you intend to download all of the content from all of my sites?  (they are all withing the .co.nz)</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8230174373327032227'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8230174373327032227'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224187440000#c8230174373327032227' title=''/><author><name>dave</name><uri>http://www.blogger.com/profile/05654861944179220597</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-2051737578'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-1097387439048779328</id><published>2008-10-16T22:06:00.000+13:00</published><updated>2008-10-16T22:06:00.000+13:00</updated><title type='text'>Gordon's left for the day, but he'll post a reply ...</title><content type='html'>Gordon's left for the day, but he'll post a reply to your question tomorrow morning.&lt;BR/&gt;&lt;BR/&gt;maybe Gordon could get the internet at home</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/1097387439048779328'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/1097387439048779328'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224147960000#c1097387439048779328' title=''/><author><name>no sleep till midnight</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1424524736'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-8511791236420625012</id><published>2008-10-16T21:12:00.000+13:00</published><updated>2008-10-16T21:12:00.000+13:00</updated><title type='text'>You say 'let us know, we'll make it stop'. By crea...</title><content type='html'>You say 'let us know, we'll make it stop'. By creating a robots.txt file the site owner *has* let you know and you are ignoring their wishes. Bad decision, bad net citizen.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8511791236420625012'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8511791236420625012'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224144720000#c8511791236420625012' title=''/><author><name>Tim Snadden</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-14892380'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-917021729962528287</id><published>2008-10-16T20:01:00.000+13:00</published><updated>2008-10-16T20:01:00.000+13:00</updated><title type='text'>I think Mike F has asked the question I was trying...</title><content type='html'>I think Mike F has asked the question I was trying to ask - for instance my work site (http://www.miramarmike.co.nz) is actually a Google hosted platform in (I assume) the US. And the opposite probably applies - international content hosted in NZ - but on a smaller scale.&lt;BR/&gt;&lt;BR/&gt;And, of course, NZ content that's on *any* site in the world - Flickr immediately springs to mind (discussions as well as the actual photos)&lt;BR/&gt;&lt;BR/&gt;Back to "putting it onto the Web is legally publication", I'm not sure that answers the question. If the rule is do not index this (using robots.txt) then I don't see how "it's published" changes it. This isn't books, this is the web and the rule (de fact, understood, community) rule still applies.&lt;BR/&gt;&lt;BR/&gt;But hey, I also understand you've a civic duty to perform and appreciate the dilemma and that you're open to talking about it.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/917021729962528287'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/917021729962528287'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224140460000#c917021729962528287' title=''/><author><name>Mike Riversdale</name><uri>http://www.blogger.com/profile/00112999693425305730</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_LB2G41LWFXg/SNgp5teiCWI/AAAAAAAAAMQ/NOmZTBIzSAA/s1600-R/2841083599_b09ba8c507.jpg%3Fv%3D0'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-795015503'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-1690754295325473986</id><published>2008-10-16T17:26:00.000+13:00</published><updated>2008-10-16T17:26:00.000+13:00</updated><title type='text'>Well, either.&lt;br&gt;&lt;br&gt;I guess i'm wondering &lt;br&gt;a) ...</title><content type='html'>Well, either.&lt;BR/&gt;&lt;BR/&gt;I guess i'm wondering &lt;BR/&gt;a) how you're getting the list of names to harvest, &lt;BR/&gt;and &lt;BR/&gt;b) how you tell the difference between sites hosted in new zealand, and those hosted overseas with .nz domain names.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/1690754295325473986'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/1690754295325473986'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224131160000#c1690754295325473986' title=''/><author><name>mike forbes</name><uri>http://www.blogger.com/profile/10429321058652694673</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-556059088'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-1334900880720889747</id><published>2008-10-16T17:22:00.000+13:00</published><updated>2008-10-16T17:22:00.000+13:00</updated><title type='text'>hey Mike&lt;br&gt;&lt;br&gt;Gordon's left for the day, but he'...</title><content type='html'>hey Mike&lt;BR/&gt;&lt;BR/&gt;Gordon's left for the day, but he'll post a reply to your question tomorrow morning. Just to clarify - the list of domains; do you mean all the ones that are being harvested, or the not-hosted-in-NZ ones?</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/1334900880720889747'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/1334900880720889747'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224130920000#c1334900880720889747' title=''/><author><name>Courtney Johnston</name><uri>http://www.blogger.com/profile/13465703476413455843</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1293390714'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-8382126856314521939</id><published>2008-10-16T17:14:00.000+13:00</published><updated>2008-10-16T17:14:00.000+13:00</updated><title type='text'>I'm curious as to how you are collecting these dom...</title><content type='html'>I'm curious as to how you are collecting these domains..&lt;BR/&gt;&lt;BR/&gt;where are you getting the list from?</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8382126856314521939'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/8382126856314521939'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224130440000#c8382126856314521939' title=''/><author><name>mike forbes</name><uri>http://www.blogger.com/profile/10429321058652694673</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-556059088'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-6094978572054463375</id><published>2008-10-16T17:03:00.000+13:00</published><updated>2008-10-16T17:03:00.000+13:00</updated><title type='text'>Hi Mike:&lt;br&gt;&lt;br&gt;Yes, in many cases it is similar t...</title><content type='html'>Hi Mike:&lt;BR/&gt;&lt;BR/&gt;Yes, in many cases it is similar to collecting people's diaries (national libraries do that too). One of the interesting ethical issues that web harvesting throws up is that legally posting stuff on the web is seen as "publication", the people doing it might think of it more as "communication", and expect some level of privacy. We will have to think about this more before we make the harvest publicly available, but for now our focus is capturing as much at-risk material as we can.&lt;BR/&gt;&lt;BR/&gt;We are gathering a small sample of NZ content that is not hosted in New Zealand, but it is very difficult to detect this reliably in an automated fashion. Our selection is therefore hand-vetted. The Czech national library is doing some neat work on automating this process using whois lookups, and looking for Czech phone numbers, names, language and email addresses on web pages that we might be able to exploit in the future (if we ever do another harvest).&lt;BR/&gt;&lt;BR/&gt;Gordon</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6094978572054463375'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/6094978572054463375'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224129780000#c6094978572054463375' title=''/><author><name>Gordon Paynter</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-560075423'/></entry><entry><id>tag:blogger.com,1999:blog-7346520062335584992.post-5435754642515524362</id><published>2008-10-16T16:36:00.000+13:00</published><updated>2008-10-16T16:36:00.000+13:00</updated><title type='text'>Interesting stance.&lt;br&gt;Isn't that a little like sa...</title><content type='html'>Interesting stance.&lt;BR/&gt;Isn't that a little like saying, "We want to keep a record of people's physcaly diaries and therefore can read over people's shoulders"?&lt;BR/&gt;&lt;BR/&gt;I get and suppotr the intention.&lt;BR/&gt;However by breaking a fundamental rule of the WWW will probably mean you'll get blocked by more direct ways now and in the future.&lt;BR/&gt;&lt;BR/&gt;Oh, and how are you handling NZ content that is not hosted in NZ? Just curious.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/5435754642515524362'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7346520062335584992/6536862248220855389/comments/default/5435754642515524362'/><link rel='alternate' type='text/html' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html?showComment=1224128160000#c5435754642515524362' title=''/><author><name>Mike Riversdale</name><uri>http://www.blogger.com/profile/00112999693425305730</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_LB2G41LWFXg/SNgp5teiCWI/AAAAAAAAAMQ/NOmZTBIzSAA/s1600-R/2841083599_b09ba8c507.jpg%3Fv%3D0'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://librarytechnz.natlib.govt.nz/2008/10/2008-web-harvest-let-us-know-how-we-can.html' ref='tag:blogger.com,1999:blog-7346520062335584992.post-6536862248220855389' source='http://www.blogger.com/feeds/7346520062335584992/posts/default/6536862248220855389' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-795015503'/></entry></feed>
