<?xml version="1.0" encoding="UTF-8"?><mods xmlns="http://www.loc.gov/mods/v3" xmlns:zs="http://docs.oasis-open.org/ns/search-ws/sruResponse" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="3.8" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-8.xsd">
  <titleInfo>
    <title>3000 .gov tabular dataset</title>
  </titleInfo>
  <titleInfo type="alternative">
    <title>Dot gov tabular dataset</title>
  </titleInfo>
  <titleInfo type="alternative">
    <title>Three thousand dot gov tabular dataset</title>
  </titleInfo>
  <name type="corporate">
    <namePart>Library of Congress Web Archiving Program</namePart>
  </name>
  <typeOfResource>software, multimedia</typeOfResource>
  <genre authority="rdacontent">text</genre>
  <genre authority="rdacontent">computer dataset</genre>
  <genre authority="lcgft">Data sets.</genre>
  <genre authority="lcgft">Web archives.</genre>
  <originInfo>
    <place>
      <placeTerm authority="marccountry" type="code">dcu</placeTerm>
    </place>
    <dateIssued encoding="marc">2019</dateIssued>
    <issuance>monographic</issuance>
  </originInfo>
  <originInfo eventType="production">
    <place>
      <placeTerm type="text">Washington, D.C. :</placeTerm>
    </place>
    <agent>
      <namePart>Library of Congress Web Archiving Program,</namePart>
    </agent>
    <dateOther type="production">[2018]</dateOther>
  </originInfo>
  <language>
    <languageTerm authority="iso639-2b" type="code">eng</languageTerm>
  </language>
  <physicalDescription>
    <form authority="marcform">electronic</form>
    <form authority="marccategory">electronic resource</form>
    <form authority="marcsmd">remote</form>
    <form authority="rdamedia" type="media">computer</form>
    <form authority="rdacarrier" type="carrier">other</form>
    <extent>Online resource (datasets)</extent>
  </physicalDescription>
  <tableOfContents type="Contents">[Comma-separated values (CSV) dataset]. -- [Tab-separated values (TSV) dataset]. -- [Excel (XLS) dataset].</tableOfContents>
  <note>"Each of these datasets consist of 1,000 files generated from indexes of the Web archives, which were used to derive a random list of 1,000 items identified as CSV, tab-separated (TSV), or Excel (XLS) files and hosted on .gov domains. Each set includes 1,000 unique CSV, TSV, and XLS files and minimal metadata about them, including links to their locations within the Library's web archive."-- Web archive datasets website.</note>
  <note>"Dataset originally created 11/6/2018."--README file</note>
  <note>"This dataset is based on exploratory work begun by the Library of Congress's Web Archiving Team in 2018. The goal of the work is to explore the contents of the Library's web archives through analysis of the indexes containing metadata from the harvested web content, as stored in CDX files. The metadata contained in the indexes was used for initial analysis, rather than the archived content stored in WARC and ARC container files, since W/ARC files present significant challenges due to large size and high processing requirements. The CDX indexes used in this initial analysis were six terabytes (TB) in size, which is a fraction of the web archive content in W/ARC files constituting nearly 1.5 petabytes (PB) at the time of analysis (November 2018)."-- README file</note>
  <note>Title from Web Archive Datasets website, viewed February 16, 2021.</note>
  <subject>
    <geographicCode authority="marcgac">n-us---</geographicCode>
  </subject>
  <subject authority="lcsh">
    <topic>Electronic government information</topic>
    <geographic>United States</geographic>
  </subject>
  <subject authority="lcsh">
    <topic>Electronic spreadsheets</topic>
    <geographic>United States</geographic>
  </subject>
  <subject authority="lcsh">
    <topic>Web archives</topic>
    <geographic>United States</geographic>
  </subject>
  <classification authority="lcc">JF1525.A8</classification>
  <location>
    <physicalLocation>s-Online</physicalLocation>
    <shelfLocator>Electronic Resource</shelfLocator>
  </location>
  <location>
    <url displayLabel="Web Archives Datasets website">https://labs.loc.gov/work/experiments/webarchive-datasets/</url>
  </location>
  <location>
    <url displayLabel="dataset" usage="primary display">https://hdl.loc.gov/loc.gdc/gdcdatasets.2020445557</url>
  </location>
  <identifier type="lccn">2020445557</identifier>
  <relatedItem>
    <location>
      <url displayLabel="Web Archives Datasets website">https://labs.loc.gov/work/experiments/webarchive-datasets/</url>
    </location>
  </relatedItem>
  <recordInfo>
    <descriptionStandard>rda</descriptionStandard>
    <recordContentSource authority="marcorg">DLC</recordContentSource>
    <recordCreationDate encoding="marc">210216</recordCreationDate>
    <recordChangeDate encoding="iso8601">20250607104600.2</recordChangeDate>
    <recordIdentifier>21906383</recordIdentifier>
    <recordOrigin>Converted from MARCXML to MODS version 3.8 using MARC21slim2MODS3-8_XSLT1-0.xsl
				(Revision 1.172 20230208)</recordOrigin>
    <languageOfCataloging>
      <languageTerm authority="iso639-2b" type="code">eng</languageTerm>
    </languageOfCataloging>
  </recordInfo>
</mods>
