{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "# Install dependencies\n", "from bs4 import BeautifulSoup\n", "from google.colab import files\n", "import re\n", "!pip install wikipedia\n", "import wikipedia\n", "import pandas as pd\n", "import io\n" ], "metadata": { "id": "8-8bQhROcP_4", "colab": { "base_uri": "https://localhost:8080/", "height": 303 }, "outputId": "65041961-6936-4ea8-a6cd-613592eec8fb" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting wikipedia\n", " Downloading wikipedia-1.4.0.tar.gz (27 kB)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from wikipedia) (4.6.3)\n", "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from wikipedia) (2.23.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (2022.9.24)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (3.0.4)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (1.24.3)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.0.0->wikipedia) (2.10)\n", "Building wheels for collected packages: wikipedia\n", " Building wheel for wikipedia (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11695 sha256=17e7e5052c68824b62b63b72f2c73d63b4e7021f4d07b97c6b4b8a45e33d086f\n", " Stored in directory: /root/.cache/pip/wheels/15/93/6d/5b2c68b8a64c7a7a04947b4ed6d89fb557dcc6bc27d1d7f3ba\n", "Successfully built wikipedia\n", "Installing collected packages: wikipedia\n", "Successfully installed wikipedia-1.4.0\n" ] } ] }, { "cell_type": "code", "source": [ "# Set CSS of display to wrap text\n", "from IPython.display import HTML, display, Image\n", "\n", "def set_css():\n", " display(HTML('''\n", " \n", " '''))\n", "get_ipython().events.register('pre_run_cell', set_css)" ], "metadata": { "id": "8OYRKVpZSw6S" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Get the first N sentences of a wikipedia article from a given search term\n", "# Replace special characters with spaces\n", "def getSummary(search_term, num_sentences):\n", " suggested = wikipedia.suggest(search_term)\n", " try:\n", " original = wikipedia.summary(search_term, sentences = num_sentences, auto_suggest=False)\n", " summary = original.replace(\" (listen)\", \" \")\n", " summary = re.sub(\"==\\s[a-zA-Z0-9_ ]*\\s==\", \" \", summary)\n", " #summary = re.sub(\"\\n\", \" \", summary)\n", " summary = re.sub(\"\\s{2,}\", \" \", summary)\n", " return summary\n", " except:\n", " \"Summary not found\"" ], "metadata": { "id": "qJZTNTfJVj1E", "colab": { "base_uri": "https://localhost:8080/", "height": 17 }, "outputId": "e254b475-4707-48b4-d52d-19a6ffe0c623" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "# Upload a CSV file with the list of features to query\n", "# At minimum, the file needs the feature_id and the search term\n", "uploaded = files.upload()" ], "metadata": { "id": "Aqgwe1mMhjGa", "colab": { "base_uri": "https://localhost:8080/", "height": 74 }, "outputId": "1d0fabe9-98e0-45b2-9e1e-7bca40869ee8" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving venus.csv to venus.csv\n" ] } ] }, { "cell_type": "code", "source": [ "df = pd.read_csv(io.BytesIO(uploaded['venus.csv']))\n", "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 386 }, "id": "VuZRW3iCPQzs", "outputId": "adc38073-fc72-4c72-a1d7-75d6f12299a8" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ " feature_id year feature feature_plural type origin \\\n", "0 19 1994 Crater Craters Famous Women England \n", "1 48 1994 Crater Craters Famous Women Austria \n", "2 50 1994 Crater Craters Famous Women United States \n", "3 52 1991 Crater Craters Famous Women Turkey \n", "4 70 1994 Crater Craters Famous Women England \n", "\n", " continent original_name clean_name search_term ... \\\n", "0 Africa Abington Abington Frances Abington ... \n", "1 Europe Adamson Adamson Joy Adamson ... \n", "2 North America Addams Addams Jane Addams ... \n", "3 Asia Adivar Adivar Halide Edib Adıvar ... \n", "4 Africa Aethelflaed Aethelflaed Æthelflæd ... \n", "\n", " under_400km under_20km last_updated diameter center_lat center_long \\\n", "0 True False 10/1/2006 21.7 -47.8 277.7 \n", "1 True False 10/1/2006 27.2 -14.8 29.6 \n", "2 True False 10/1/2006 87.0 -56.2 98.9 \n", "3 True False 10/1/2006 30.3 8.9 76.2 \n", "4 True False 10/1/2006 20.0 -18.2 196.6 \n", "\n", " northern_lat southern_lat eastern_long western_long \n", "0 -47.8 -47.8 277.7 277.7 \n", "1 -14.8 -14.8 29.6 29.6 \n", "2 -56.2 -56.2 98.9 98.9 \n", "3 8.9 8.9 76.2 76.2 \n", "4 -18.2 -18.2 196.6 196.6 \n", "\n", "[5 rows x 22 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
feature_idyearfeaturefeature_pluraltypeorigincontinentoriginal_nameclean_namesearch_term...under_400kmunder_20kmlast_updateddiametercenter_latcenter_longnorthern_latsouthern_lateastern_longwestern_long
0191994CraterCratersFamous WomenEnglandAfricaAbingtonAbingtonFrances Abington...TrueFalse10/1/200621.7-47.8277.7-47.8-47.8277.7277.7
1481994CraterCratersFamous WomenAustriaEuropeAdamsonAdamsonJoy Adamson...TrueFalse10/1/200627.2-14.829.6-14.8-14.829.629.6
2501994CraterCratersFamous WomenUnited StatesNorth AmericaAddamsAddamsJane Addams...TrueFalse10/1/200687.0-56.298.9-56.2-56.298.998.9
3521991CraterCratersFamous WomenTurkeyAsiaAdivarAdivarHalide Edib Adıvar...TrueFalse10/1/200630.38.976.28.98.976.276.2
4701994CraterCratersFamous WomenEnglandAfricaAethelflaedAethelflaedÆthelflæd...TrueFalse10/1/200620.0-18.2196.6-18.2-18.2196.6196.6
\n", "

5 rows × 22 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "# Make the search df by pulling out two specific columns: the feature ID \n", "# and the search term (which I manually added to the original data source)\n", "names = df[df[\"search_term\"].notnull()][[\"feature_id\", \"search_term\"]]\n", "names" ], "metadata": { "id": "1cSK-6TXQn77", "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "outputId": "6f2876a4-47da-4b3b-bb6b-fb5df39e1865" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ " feature_id search_term\n", "0 19 Frances Abington\n", "1 48 Joy Adamson\n", "2 50 Jane Addams\n", "3 52 Halide Edib Adıvar\n", "4 70 Æthelflæd\n", "... ... ...\n", "1412 3619 Mamitu\n", "1436 3834 Manat (goddess)\n", "1535 4298 Ninhursag\n", "1541 4326 Nokomis\n", "1895 433 Aspasia\n", "\n", "[444 rows x 2 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
feature_idsearch_term
019Frances Abington
148Joy Adamson
250Jane Addams
352Halide Edib Adıvar
470Æthelflæd
.........
14123619Mamitu
14363834Manat (goddess)
15354298Ninhursag
15414326Nokomis
1895433Aspasia
\n", "

444 rows × 2 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "# Initialize an empty Pandas dataframe\n", "output = pd.DataFrame()" ], "metadata": { "id": "LTcLs47yWF8k", "colab": { "base_uri": "https://localhost:8080/", "height": 17 }, "outputId": "2cfd5b26-c341-4799-f6d7-c3d4798c2a99" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "# Loop through the search data frame and run the Wikipedia queries\n", "ids_col = []\n", "search_terms_col = []\n", "summary_col = []\n", "max_number = len(names)\n", "\n", "for i in range(max_number):\n", " id = names.iloc[i].feature_id\n", " name = names.iloc[i].search_term\n", " print(i + 1, \"/\", max_number, \": \", name)\n", " summary = getSummary(name, 3)\n", " ids_col.append(id)\n", " search_terms_col.append(name)\n", " summary_col.append(summary)" ], "metadata": { "id": "NX73WV3kRE7w", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "270df73f-e055-4b03-ca52-c2861b30a5f3" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "1 / 444 : Frances Abington\n", "2 / 444 : Joy Adamson\n", "3 / 444 : Jane Addams\n", "4 / 444 : Halide Edib Adıvar\n", "5 / 444 : Æthelflæd\n", "6 / 444 : Aglaonice\n", "7 / 444 : Maria Gaetana Agnesi\n", "8 / 444 : Agrippina the Elder\n", "9 / 444 : Delia Akeley\n", "10 / 444 : Anna Akhmatova\n", "11 / 444 : Yosano Akiko\n", "12 / 444 : Zinaida Aksentyeva\n", "13 / 444 : Aisha Taymur\n", "14 / 444 : Louisa M. Alcott\n", "15 / 444 : Amalasuintha\n", "16 / 444 : Carmen Amaya\n", "17 / 444 : Amenirdis I\n", "18 / 444 : Anaxandra\n", "19 / 444 : Azar Andami\n", "20 / 444 : Elena Andreianova\n", "21 / 444 : Annia Faustina\n", "22 / 444 : Mary Anning\n", "23 / 444 : Susan B. Anthony\n", "24 / 444 : Virginia Apgar\n", "25 / 444 : Aurelia (mother of Caesar)\n", "26 / 444 : Jane Austen\n", "27 / 444 : Avvaiyar\n", "28 / 444 : Hertha M. Ayrton\n", "29 / 444 : Tekla Bądarzewska-Baranowska\n", "30 / 444 : Josephine Baker\n", "31 / 444 : Emily Greene Balch\n", "32 / 444 : Ban Zhao\n", "33 / 444 : Baranamtarra\n", "34 / 444 : Pancho Barnes\n", "35 / 444 : Oliva Sabuco\n", "36 / 444 : Ethel Barrymore\n", "37 / 444 : Valeria Barsova\n", "38 / 444 : Agniya Barto\n", "39 / 444 : Clara Barton\n", "40 / 444 : Florence Bascom\n", "41 / 444 : Marie Bashkirtseff\n", "42 / 444 : Laura Bassi\n", "43 / 444 : Bathsheba\n", "44 / 444 : Jean Batten\n", "45 / 444 : Catherine Beecher\n", "46 / 444 : Aphra Behn\n", "47 / 444 : User:Hjbender1/Sandbox\n", "48 / 444 : Olga Bergholz\n", "49 / 444 : Sarah Bernhardt\n", "50 / 444 : Sophia Tolstaya\n", "51 / 444 : Mary McLeod Bethune\n", "52 / 444 : Mary Ann Bickerdyke\n", "53 / 444 : Anna Blackburne\n", "54 / 444 : Karen Blixen\n", "55 / 444 : Nellie Bly\n", "56 / 444 : Boudica\n", "57 / 444 : Marie Boivin\n", "58 / 444 : Anne Boleyn\n", "59 / 444 : Kristine Bonnevie\n", "60 / 444 : Zitkala-Sa\n", "61 / 444 : Nadia Boulanger\n", "62 / 444 : Margaret Bourke-White\n", "63 / 444 : Louise Arner Boyd\n", "64 / 444 : Karen Boye\n", "65 / 444 : Anne Bradstreet\n", "66 / 444 : Frederika Bremer\n", "67 / 444 : Frances Brooke\n", "68 / 444 : Elizabeth Barrett Browning\n", "69 / 444 : Lucy Meredith Bryce\n", "70 / 444 : Pearl S. Buck\n", "71 / 444 : Adriana Budevska\n", "72 / 444 : Yevgenia Bugoslavskaya\n", "73 / 444 : Francesca Caccini\n", "74 / 444 : Cai Yan\n", "75 / 444 : Taylor Caldwell\n", "76 / 444 : Maria Callas\n", "77 / 444 : Kora of Sicyon\n", "78 / 444 : Emily Carr\n", "79 / 444 : Teresa Carreño\n", "80 / 444 : Rachel Carson\n", "81 / 444 : Maybelle Carter\n", "82 / 444 : Mary Cassatt\n", "83 / 444 : Rosalía de Castro\n", "84 / 444 : Willa Cather\n", "85 / 444 : Edith Cavell\n", "86 / 444 : Susannah Centlivre\n", "87 / 444 : Dickey Chapelle\n", "88 / 444 : Fukuda Chiyo-ni\n", "89 / 444 : Agatha Christie\n", "90 / 444 : Cleopatra\n", "91 / 444 : Patsy Cline\n", "92 / 444 : Jacqueline Cochran\n", "93 / 444 : Carola Cohn\n", "94 / 444 : Colette\n", "95 / 444 : Anna Komnene\n", "96 / 444 : Anne Finch Conway\n", "97 / 444 : Gerty Cori\n", "98 / 444 : Corinna\n", "99 / 444 : Elisabeth Hevelius\n", "100 / 444 : Isabella Cortese\n", "101 / 444 : Eugénie Cotton\n", "102 / 444 : Maria Cunitz\n", "103 / 444 : Isabella d'Este\n", "104 / 444 : Maria Danilova\n", "105 / 444 : Hariclea Darclée\n", "106 / 444 : Yekaterina Vorontsova-Dashkova\n", "107 / 444 : Dat So La Lee\n", "108 / 444 : Emily Davies\n", "109 / 444 : Josefa de Óbidos\n", "110 / 444 : Martine Bertereau\n", "111 / 444 : Simone de Beauvoir\n", "112 / 444 : Marie-Jeanne de Lalande\n", "113 / 444 : Germaine de Staël\n", "114 / 444 : Lydia DeWitt\n", "115 / 444 : Agatha Deken\n", "116 / 444 : Grazia Deledda\n", "117 / 444 : Ella Cara Deloria\n", "118 / 444 : Emmy Destinn\n", "119 / 444 : Emily Dickinson\n", "120 / 444 : Marlene Dietrich\n", "121 / 444 : Dorothea Dix\n", "122 / 444 : Émilie du Châtelet\n", "123 / 444 : Isadora Duncan\n", "124 / 444 : Ariel Durant\n", "125 / 444 : Eleonora Duse\n", "126 / 444 : Helene Dutrieu\n", "127 / 444 : Amelia Earhart\n", "128 / 444 : Maria Edgeworth\n", "129 / 444 : Tilly Edinger\n", "130 / 444 : Nina Yefimova\n", "131 / 444 : George Eliot\n", "132 / 444 : Erinna\n", "133 / 444 : Mariya Ermolova\n", "134 / 444 : Dorothea Erxleben\n", "135 / 444 : Josefa Llanes Escoda\n", "136 / 444 : Aelia Eudocia\n", "137 / 444 : Olga Fedchenko\n", "138 / 444 : Edna Ferber\n", "139 / 444 : La Caramba\n", "140 / 444 : Kathleen Ferrier\n", "141 / 444 : Kirsten Flagstad\n", "142 / 444 : Dian Fossey\n", "143 / 444 : Marie Fouquet\n", "144 / 444 : Anne Frank\n", "145 / 444 : Fredegund\n", "146 / 444 : Beatriz Galindo\n", "147 / 444 : Greta Gustafsson Garbo\n", "148 / 444 : July Garland\n", "149 / 444 : Judith Gautier\n", "150 / 444 : Vera Fedorovna Gaze\n", "151 / 444 : Artemisia Gentileschi\n", "152 / 444 : Sophie Germain\n", "153 / 444 : Alessandra Giliani\n", "154 / 444 : Mary Gilmore\n", "155 / 444 : Susan Glaspell\n", "156 / 444 : Lady Godiva\n", "157 / 444 : Maria Goeppert Mayer\n", "158 / 444 : Anna Golubkina\n", "159 / 444 : Natalia Goncharova\n", "160 / 444 : Martha Graham\n", "161 / 444 : Kate Greenaway\n", "162 / 444 : Lady Gregory\n", "163 / 444 : Lady Jane Grey\n", "164 / 444 : Sarah Moore Grimké\n", "165 / 444 : Valentina Grizodubova\n", "166 / 444 : Guan Daosheng\n", "167 / 444 : Yvette Guilbert\n", "168 / 444 : Nell Gwyn\n", "169 / 444 : Wilma Neruda\n", "170 / 444 : Lorraine Hansberry\n", "171 / 444 : Hatshepsut\n", "172 / 444 : Fumiko Hayashi (author)\n", "173 / 444 : Lillian Hellman\n", "174 / 444 : Héloïse\n", "175 / 444 : Sonja Henie\n", "176 / 444 : Barbara Hepworth\n", "177 / 444 : Leizu\n", "178 / 444 : Marguerite Higgins\n", "179 / 444 : Himiko\n", "180 / 444 : Billie Holiday\n", "181 / 444 : Mary Horner Lyell\n", "182 / 444 : Julia Ward Howe\n", "183 / 444 : Hroswitha\n", "184 / 444 : Hsueh T'ao\n", "185 / 444 : Hua Mulan\n", "186 / 444 : Huang Daopo\n", "187 / 444 : Peggy Hull\n", "188 / 444 : Zora Neale Hurston\n", "189 / 444 : Hwang Jini\n", "190 / 444 : Fusaye Ichikawa\n", "191 / 444 : Isabella I of Castile\n", "192 / 444 : Murasaki Shikibu\n", "193 / 444 : Mari Jászai\n", "194 / 444 : Sophia Jex-Blake\n", "195 / 444 : Jerusha Jhirad\n", "196 / 444 : Amy Johnson\n", "197 / 444 : Irène Joliot-Curie\n", "198 / 444 : Anandibai Joshee\n", "199 / 444 : Maria Jotuni\n", "200 / 444 : Frida Kahlo\n", "201 / 444 : Kaikilani\n", "202 / 444 : Kartini\n", "203 / 444 : Angelica Kauffman\n", "204 / 444 : Keleanohoanaapiapi\n", "205 / 444 : Helen Keller\n", "206 / 444 : Fanny Kemble\n", "207 / 444 : Elizabeth Kenny\n", "208 / 444 : Mihri Khatun\n", "209 / 444 : Mary Kingsley\n", "210 / 444 : Katherina Klafsky\n", "211 / 444 : Mariya Klenova\n", "212 / 444 : Lydia Koidula\n", "213 / 444 : Käthe Kollwitz\n", "214 / 444 : Marie Konopnicka\n", "215 / 444 : Helena Kottauer\n", "216 / 444 : Madame de La Fayette\n", "217 / 444 : Marie-Louise Lachapelle\n", "218 / 444 : Selma Lagerlöf\n", "219 / 444 : Wanda Landowska\n", "220 / 444 : Lillie Langtry\n", "221 / 444 : Marie Laurencin\n", "222 / 444 : Emma Lazarus\n", "223 / 444 : Sarra Lebedeva\n", "224 / 444 : Jeanne-Philiberte Ledoux\n", "225 / 444 : Inge Lehmann\n", "226 / 444 : Wrexie Leonard\n", "227 / 444 : Judith Leyster\n", "228 / 444 : Li Qingzhao\n", "229 / 444 : Leona Woods Marshall Libby\n", "230 / 444 : Jenny Lind\n", "231 / 444 : Astrid Lindgren\n", "232 / 444 : Belva Lockwood\n", "233 / 444 : Kathleen Lonsdale\n", "234 / 444 : Marie-Aimée Lullin\n", "235 / 444 : Mary Lyon\n", "236 / 444 : Ma Shouzhen\n", "237 / 444 : Flora MacDonald\n", "238 / 444 : Anna Magnani\n", "239 / 444 : Maria Malibran\n", "240 / 444 : La Malinche\n", "241 / 444 : Margaret Eliza Maltby\n", "242 / 444 : Katherine Mansfield\n", "243 / 444 : Sidnie Manton\n", "244 / 444 : Anna Manzolini\n", "245 / 444 : Maria Celeste\n", "246 / 444 : Beryl Markham\n", "247 / 444 : Ngaio Marsh\n", "248 / 444 : Maria Martinez\n", "249 / 444 : Hōjō Masako\n", "250 / 444 : Margaret Mead\n", "251 / 444 : Pandita Ramabai\n", "252 / 444 : Ganjevi Mehseti\n", "253 / 444 : Lise Meitner\n", "254 / 444 : Nellie Melba\n", "255 / 444 : Maria Sibylla Merian\n", "256 / 444 : Merit-Ptah\n", "257 / 444 : Dasha from Sevastopol\n", "258 / 444 : Edna St. Vincent Millay\n", "259 / 444 : Sibylle Riqueti de Mirabeau\n", "260 / 444 : Mona Lisa\n", "261 / 444 : Maria Montessori\n", "262 / 444 : Lola Montez\n", "263 / 444 : Marianne Moore\n", "264 / 444 : Berthe Morisot\n", "265 / 444 : Anna Mary Robertson Moses\n", "266 / 444 : Anna Cora Mowatt\n", "267 / 444 : Mu Guiying\n", "268 / 444 : Vera Mukhina\n", "269 / 444 : Mumtaz Mahal\n", "270 / 444 : Gabriele Münter\n", "271 / 444 : Nodira\n", "272 / 444 : Zofia Nałkowska\n", "273 / 444 : Božena Němcová\n", "274 / 444 : Louise Nevelson\n", "275 / 444 : Florence Nightingale\n", "276 / 444 : Bronislava Nijinska\n", "277 / 444 : Christine Nilsson\n", "278 / 444 : Anaïs Nin\n", "279 / 444 : Nofret\n", "280 / 444 : Hedwig Nordenflycht\n", "281 / 444 : Flannery O'Connor\n", "282 / 444 : Georgia O'Keeffe\n", "283 / 444 : Annie Oakley\n", "284 / 444 : Nadezhda Obukhova\n", "285 / 444 : Zofia Oleśnicka\n", "286 / 444 : Baroness Orczy\n", "287 / 444 : Lyubov Orlova\n", "288 / 444 : Polina Osipenko\n", "289 / 444 : Varya Panina\n", "290 / 444 : Violeta Parra\n", "291 / 444 : Adelina Patti\n", "292 / 444 : Anna Pavlova\n", "293 / 444 : Cecilia Payne-Gaposchkin\n", "294 / 444 : Olena Pchilka\n", "295 / 444 : Annie Peck\n", "296 / 444 : Tonita Peña\n", "297 / 444 : Phryne\n", "298 / 444 : Édith Piaf\n", "299 / 444 : Elena Cornaro Piscopia\n", "300 / 444 : Pocahontas\n", "301 / 444 : Yelena Polenova\n", "302 / 444 : Rosa Ponselle\n", "303 / 444 : Aleksandra Potanina\n", "304 / 444 : Beatrix Potter\n", "305 / 444 : Catharina Prichard\n", "306 / 444 : Harriet Quimby\n", "307 / 444 : Ayn Rand\n", "308 / 444 : Marina M. Raskova\n", "309 / 444 : Razia Sultana\n", "310 / 444 : Juliette Récamier\n", "311 / 444 : Jean Rhys\n", "312 / 444 : Ellen Swallow Richards\n", "313 / 444 : Margaretta Riley\n", "314 / 444 : Gorislava Rogneda\n", "315 / 444 : Sofia Romanskaya\n", "316 / 444 : Rosa Bonheur\n", "317 / 444 : Christina Rossetti\n", "318 / 444 : Varvara Rudneva\n", "319 / 444 : Lidia Ruslanova\n", "320 / 444 : Florence R. Sabin\n", "321 / 444 : Sacagawea\n", "322 / 444 : Nelly Sachs\n", "323 / 444 : Shin Saimdang\n", "324 / 444 : George Sand\n", "325 / 444 : Cora Sandel\n", "326 / 444 : Margaret Sanger\n", "327 / 444 : Sappho\n", "328 / 444 : Dewi Sartika\n", "329 / 444 : Saskia van Uylenburgh\n", "330 / 444 : Dorothy L. Sayers\n", "331 / 444 : Caterina Scarpellini\n", "332 / 444 : Ernestine Schumann-Heink\n", "333 / 444 : Valentina Serova\n", "334 / 444 : Marie de Rabutin-Chantal, marquise de Sévigné\n", "335 / 444 : Jane Seymour\n", "336 / 444 : Natalia Shelikhova\n", "337 / 444 : Mary Stone (doctor)\n", "338 / 444 : Klavdiya Shulzhenko\n", "339 / 444 : Sarah Siddons\n", "340 / 444 : Mary Sidney\n", "341 / 444 : Alla Nikolaevna Simonenko\n", "342 / 444 : Elisabetta Sirani\n", "343 / 444 : Edith Sitwell\n", "344 / 444 : Elizabeth Cady Stanton\n", "345 / 444 : Gertrude Stein\n", "346 / 444 : Sabina von Steinbach\n", "347 / 444 : Marie Stopes\n", "348 / 444 : Alfonsina Storni\n", "349 / 444 : Harriet Beecher Stowe\n", "350 / 444 : Mary, Queen of Scots\n", "351 / 444 : Anne Sullivan\n", "352 / 444 : Maria Taglioni\n", "353 / 444 : Ida Tarbell\n", "354 / 444 : Helen Taussig\n", "355 / 444 : Sara Teasdale\n", "356 / 444 : Princess Maria Tenisheva\n", "357 / 444 : Josephine Tey\n", "358 / 444 : M. Carey Thomas\n", "359 / 444 : Tipporah\n", "360 / 444 : Alice Toklas\n", "361 / 444 : Frances Milton Trollope\n", "362 / 444 : Sojourner Truth\n", "363 / 444 : Lidiya Tseraskaya\n", "364 / 444 : Marina Tsvetayeva\n", "365 / 444 : Harriet Tubman\n", "366 / 444 : Marie Tussaud\n", "367 / 444 : Nadezhda Udaltsova\n", "368 / 444 : Sigrid Undset\n", "369 / 444 : Jahonotin Uvaysiy\n", "370 / 444 : Elena Văcărescu\n", "371 / 444 : Suzanne Valadon\n", "372 / 444 : Pauline Viardot\n", "373 / 444 : Vibert Douglas\n", "374 / 444 : Élisabeth Vigée Le Brun\n", "375 / 444 : Jeannette Villepreux-Power\n", "376 / 444 : Anna Volkova\n", "377 / 444 : Maria Theresia von Paradis\n", "378 / 444 : Anna Maria van Schurman\n", "379 / 444 : Regina Von Siebold\n", "380 / 444 : Bertha von Suttner\n", "381 / 444 : Marko Vovchok\n", "382 / 444 : Ethel Voynich\n", "383 / 444 : Wang Zhenyi (astronomer)\n", "384 / 444 : Mercy Otis Warren\n", "385 / 444 : Simone Weil\n", "386 / 444 : Wen Shu\n", "387 / 444 : Rebecca West\n", "388 / 444 : Edith Wharton\n", "389 / 444 : Phillis Wheatley\n", "390 / 444 : Sarah Whiting\n", "391 / 444 : Mary Watson Whitney\n", "392 / 444 : Clara Wieck\n", "393 / 444 : Jane Wilde\n", "394 / 444 : Laura Ingalls Wilder\n", "395 / 444 : Emma Willard\n", "396 / 444 : Toby Riddle\n", "397 / 444 : Sarah Winnemucca\n", "398 / 444 : Wilhelmine Witte\n", "399 / 444 : Mary Wollstonecraft\n", "400 / 444 : Virginia Woolf\n", "401 / 444 : Fanny Bullock Workman\n", "402 / 444 : Wu Zetian\n", "403 / 444 : Xantippe\n", "404 / 444 : Xiao Hong\n", "405 / 444 : Aleksandra Yablochkina\n", "406 / 444 : Caroline Yale\n", "407 / 444 : Charlotte Yonge\n", "408 / 444 : Yayoi Yoshioka\n", "409 / 444 : Adela Zamudio\n", "410 / 444 : Žemaitė\n", "411 / 444 : Zenobia\n", "412 / 444 : Maria Zhilova\n", "413 / 444 : Zhu Shuzhen\n", "414 / 444 : Lydia Zvereva\n", "415 / 444 : Erecura\n", "416 / 444 : Aglaurus, daughter of Cecrops\n", "417 / 444 : Ak Ana\n", "418 / 444 : Al-Uzza \n", "419 / 444 : Allani\n", "420 / 444 : Imentet\n", "421 / 444 : Anahit\n", "422 / 444 : Annapurna (goddess)\n", "423 / 444 : Spenta Armaiti\n", "424 / 444 : Artemis\n", "425 / 444 : Aruru (goddess)\n", "426 / 444 : Ashnan\n", "427 / 444 : Asintmah\n", "428 / 444 : Axomamma\n", "429 / 444 : Atahensic\n", "430 / 444 : Atargatis\n", "431 / 444 : Atete\n", "432 / 444 : Asdzą́ą́ Nádleehé\n", "433 / 444 : Atabey (goddess)\n", "434 / 444 : Auðumbla\n", "435 / 444 : Bachué\n", "436 / 444 : Banba\n", "437 / 444 : Disani\n", "438 / 444 : Dzunukwa\n", "439 / 444 : Komorkis\n", "440 / 444 : Mamitu\n", "441 / 444 : Manat (goddess)\n", "442 / 444 : Ninhursag\n", "443 / 444 : Nokomis\n", "444 / 444 : Aspasia\n" ] } ] }, { "cell_type": "code", "source": [ "# Update the empty \"output\" dataframe with the values from the loop\n", "print(len(ids_col))\n", "\n", "output = pd.DataFrame({\n", " 'feature_id': pd.Series(ids_col),\n", " 'search_term': pd.Series(search_terms_col),\n", " 'summary': pd.Series(summary_col),\n", " 'image_url': pd.Series(image_url_col),\n", " 'image_caption': pd.Series(image_caption_col)\n", "})\n", "\n", "output" ], "metadata": { "id": "RhH6T_sIWvxa", "colab": { "base_uri": "https://localhost:8080/", "height": 638 }, "outputId": "6f76b25c-7fb8-4398-fc25-30354face042" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "[19, 48, 50, 52, 70, 85, 87, 93, 126, 128, 132, 138, 152, 168, 212, 217, 224, 253, 258, 263, 284, 285, 292, 316, 472, 483, 512, 522, 547, 564, 572, 591, 602, 611, 614, 618, 619, 622, 623, 626, 627, 630, 636, 639, 657, 663, 688, 699, 706, 711, 722, 738, 763, 778, 782, 783, 799, 803, 815, 816, 841, 843, 849, 850, 856, 871, 891, 896, 902, 907, 908, 912, 954, 963, 971, 975, 977, 1028, 1030, 1033, 1036, 1043, 1052, 1077, 1085, 1101, 1142, 1189, 1201, 1240, 1244, 1260, 1264, 1267, 1277, 1290, 1309, 1311, 1317, 1318, 1321, 1348, 1698, 1401, 1412, 1422, 1423, 1427, 1434, 1435, 1436, 1441, 1446, 1450, 1464, 1469, 1475, 1505, 1528, 1532, 1551, 1649, 1661, 1671, 1674, 1677, 1704, 1721, 1722, 1726, 1766, 1831, 1836, 1838, 1843, 1862, 1926, 1937, 1941, 1944, 1971, 1998, 2002, 2013, 2017, 2078, 2103, 2108, 2121, 2125, 2141, 2147, 2162, 2166, 2183, 2202, 2203, 2216, 2218, 2235, 2245, 2247, 2249, 2253, 2256, 2265, 2271, 2299, 2329, 2352, 2385, 2391, 2436, 2441, 2449, 2461, 2498, 2499, 2509, 2542, 2555, 2564, 2566, 2567, 2569, 2571, 2578, 2589, 2597, 2644, 2723, 2778, 2811, 2825, 2827, 2836, 2840, 2845, 2847, 2886, 2887, 2940, 2963, 2981, 2983, 2985, 2989, 3006, 3038, 3060, 3063, 3072, 3081, 3086, 3100, 3187, 3198, 3232, 3266, 3274, 3302, 3309, 3321, 3327, 3332, 3351, 3374, 3381, 3385, 3405, 14146, 3442, 3467, 3513, 3536, 3545, 3553, 3582, 3606, 3609, 3611, 3637, 3641, 3647, 3705, 3719, 3721, 3727, 3735, 3787, 3792, 3805, 3808, 3816, 3853, 3856, 3890, 3898, 3919, 3960, 4020, 4023, 4027, 4037, 4049, 4056, 4059, 4063, 4067, 4069, 4105, 4120, 4381, 4227, 4262, 4264, 4280, 4285, 4325, 4331, 4539, 4541, 4382, 4391, 4437, 4485, 4494, 4507, 4579, 4593, 4612, 4617, 4623, 4625, 4629, 4679, 4713, 4716, 4739, 4769, 4779, 4796, 4810, 4812, 4821, 4902, 4944, 4951, 4970, 4973, 5019, 5026, 5034, 5170, 5174, 5187, 5197, 5217, 5234, 5257, 5264, 5265, 5293, 5300, 5301, 5307, 5319, 5330, 5334, 5349, 5360, 5386, 5437, 5800, 5447, 5482, 5488, 5507, 5518, 5519, 5540, 5574, 5587, 5682, 5691, 5692, 5718, 5719, 5720, 5729, 5756, 5820, 5869, 5883, 5892, 5920, 5943, 5976, 6024, 6046, 6090, 6099, 6103, 6114, 6122, 6153, 6185, 6224, 6264, 6465, 6276, 6376, 6377, 6385, 6388, 6429, 6443, 6444, 6445, 6446, 6452, 6453, 6487, 6492, 6511, 6517, 6526, 6530, 6532, 6535, 6537, 6542, 6545, 6546, 6550, 6558, 6562, 14174, 6571, 6574, 6578, 6585, 6601, 6610, 6622, 6630, 6668, 6674, 6699, 6943, 6718, 6731, 6734, 6769, 67, 90, 125, 153, 192, 225, 248, 282, 340, 401, 409, 424, 428, 431, 444, 449, 452, 462, 464, 467, 540, 593, 1546, 1648, 3076, 3619, 3834, 4298, 4326, 433] 444\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " feature_id search_term \\\n", "0 19 Frances Abington \n", "1 48 Joy Adamson \n", "2 50 Jane Addams \n", "3 52 Halide Edib Adıvar \n", "4 70 Æthelflæd \n", ".. ... ... \n", "439 3619 Mamitu \n", "440 3834 Manat (goddess) \n", "441 4298 Ninhursag \n", "442 4326 Nokomis \n", "443 433 Aspasia \n", "\n", " summary \\\n", "0 Frances \"Fanny\" Abington (1737 – 4 March 1815)... \n", "1 Friederike Victoria \"Joy\" Adamson (née Gessner... \n", "2 Laura Jane Addams (September 6, 1860 – May 21,... \n", "3 Halide Edib Adıvar (Ottoman Turkish: خالده ادي... \n", "4 Æthelflæd, Lady of the Mercians (c. 870 – 12 J... \n", ".. ... \n", "439 Mammitum, Mammitu or Mammi was a Mesopotamian ... \n", "440 Manāt (Arabic: مناة Arabic pronunciation: [maˈ... \n", "441 Ninḫursaĝ (Sumerian: 𒀭𒎏𒄯𒊕 Ninḫarsang; DNIN-ḪAR... \n", "442 Nokomis is the name of Nanabozho's grandmother... \n", "443 Aspasia (; Greek: Ἀσπασία Greek: [aspasíaː]; c... \n", "\n", " image_url \\\n", "0 https://upload.wikimedia.org/wikipedia/commons... \n", "1 https://upload.wikimedia.org/wikipedia/commons... \n", "2 https://upload.wikimedia.org/wikipedia/commons... \n", "3 No image URL \n", "4 https://upload.wikimedia.org/wikipedia/commons... \n", ".. ... \n", "439 No image URL \n", "440 https://upload.wikimedia.org/wikipedia/commons... \n", "441 https://upload.wikimedia.org/wikipedia/commons... \n", "442 No image URL \n", "443 No image URL \n", "\n", " image_caption \n", "0 Portrait by Joshua Reynolds \n", "1 Adamson with Elsa the lion c. 1958 \n", "2 Addams c. 1926 \n", "3 No image Caption \n", "4 Æthelflæd (from The Cartulary and Customs of A... \n", ".. ... \n", "439 No image Caption \n", "440 2nd century AD relief from Hatra depicting the... \n", "441 Akkadian cylinder seal impression depicting a ... \n", "442 No image Caption \n", "443 No image Caption \n", "\n", "[444 rows x 5 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
feature_idsearch_termsummaryimage_urlimage_caption
019Frances AbingtonFrances \"Fanny\" Abington (1737 – 4 March 1815)...https://upload.wikimedia.org/wikipedia/commons...Portrait by Joshua Reynolds
148Joy AdamsonFriederike Victoria \"Joy\" Adamson (née Gessner...https://upload.wikimedia.org/wikipedia/commons...Adamson with Elsa the lion c. 1958
250Jane AddamsLaura Jane Addams (September 6, 1860 – May 21,...https://upload.wikimedia.org/wikipedia/commons...Addams c. 1926
352Halide Edib AdıvarHalide Edib Adıvar (Ottoman Turkish: خالده ادي...No image URLNo image Caption
470ÆthelflædÆthelflæd, Lady of the Mercians (c. 870 – 12 J...https://upload.wikimedia.org/wikipedia/commons...Æthelflæd (from The Cartulary and Customs of A...
..................
4393619MamituMammitum, Mammitu or Mammi was a Mesopotamian ...No image URLNo image Caption
4403834Manat (goddess)Manāt (Arabic: مناة Arabic pronunciation: [maˈ...https://upload.wikimedia.org/wikipedia/commons...2nd century AD relief from Hatra depicting the...
4414298NinhursagNinḫursaĝ (Sumerian: 𒀭𒎏𒄯𒊕 Ninḫarsang; DNIN-ḪAR...https://upload.wikimedia.org/wikipedia/commons...Akkadian cylinder seal impression depicting a ...
4424326NokomisNokomis is the name of Nanabozho's grandmother...No image URLNo image Caption
443433AspasiaAspasia (; Greek: Ἀσπασία Greek: [aspasíaː]; c...No image URLNo image Caption
\n", "

444 rows × 5 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 57 } ] }, { "cell_type": "code", "source": [ "# Download the file to your computer\n", "output.to_csv('output.csv', encoding='utf-8-sig')\n", "files.download('output.csv')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "id": "8GO6Vx-WThw3", "outputId": "c7712102-4621-4f91-88cf-36996087a259" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "application/javascript": [ "\n", " async function download(id, filename, size) {\n", " if (!google.colab.kernel.accessAllowed) {\n", " return;\n", " }\n", " const div = document.createElement('div');\n", " const label = document.createElement('label');\n", " label.textContent = `Downloading \"${filename}\": `;\n", " div.appendChild(label);\n", " const progress = document.createElement('progress');\n", " progress.max = size;\n", " div.appendChild(progress);\n", " document.body.appendChild(div);\n", "\n", " const buffers = [];\n", " let downloaded = 0;\n", "\n", " const channel = await google.colab.kernel.comms.open(id);\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", "\n", " for await (const message of channel.messages) {\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", " if (message.buffers) {\n", " for (const buffer of message.buffers) {\n", " buffers.push(buffer);\n", " downloaded += buffer.byteLength;\n", " progress.value = downloaded;\n", " }\n", " }\n", " }\n", " const blob = new Blob(buffers, {type: 'application/binary'});\n", " const a = document.createElement('a');\n", " a.href = window.URL.createObjectURL(blob);\n", " a.download = filename;\n", " div.appendChild(a);\n", " a.click();\n", " div.remove();\n", " }\n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "application/javascript": [ "download(\"download_794036be-d4de-4d0e-982d-bd59fc60f750\", \"output.csv\", 242563)" ] }, "metadata": {} } ] } ] }