Unterschiede

Hier werden die Unterschiede zwischen zwei Versionen angezeigt.

--- elasticsearch [2025/06/30 11:35]
jango [Service]
+++ elasticsearch [2025/07/05 11:13] (aktuell)
jango [Service]
@@ Zeile 31: / Zeile 31: @@
 ====Linux====
+<code>
+wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.10.0-amd64.deb
+sudo dpkg -i elasticsearch-7.10.0-amd64.deb
+sudo apt --fix-broken install
+</code>
 <code>
 wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
@@ Zeile 38: / Zeile 45: @@
 </code>
-sudo nano /etc/elasticsearch/elasticsearch.yml
+<code>
+# sudo nano /etc/elasticsearch/elasticsearch.yml
+cluster.name: my-cluster
+network.host: 0.0.0.0
+http.port: 9200
+</code>
+<code>
+sudo systemctl start elasticsearch
+sudo systemctl enable elasticsearch
+curl -X GET "localhost:9200/"
+</code>
+====Docker====
+<code>
+version: '3.8'
+services:
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0
+    container_name: elasticsearch
+    environment:
+      - discovery.type=single-node
+      - ES_JAVA_OPTS=-Xms512m -Xmx512m
+      - ELASTIC_PASSWORD=dein_sicheres_passwort
+    ports:
+      - 9200:9200
+      - 9300:9300
+    volumes:
+      - esdata:/usr/share/elasticsearch/data
+    networks:
+      - es-net
+volumes:
+  esdata:
+    driver: local
+networks:
+  es-net:
+    driver: bridge
+</code>
+<code>
+sudo curl -X GET "https://localhost:9200/_cat/indices" --insecure -u elastic:dein_sicheres_passwort
+</code>
+=====Sicherheit=====
+[[https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/security-settings|Elasticsearch Security Settings]]
 =====Service=====
@@ Zeile 144: / Zeile 198: @@
 <code>
 #add user
-curl -X POST "https://localhost:9200/_security/user/manuel" -H "Content-Type: application/json" -d "{ \"password\": \"s3cr3t\", \"roles\": [\"superuser\"], \"full_name\": \"Manuel Zarat\" }" -u elastic:xxx
+curl -X POST "http://localhost:9200/_security/user/manuel" -H "Content-Type: application/json" -d "{ \"password\": \"s3cr3t\", \"roles\": [\"superuser\"], \"full_name\": \"Manuel Zarat\" }" -u elastic:xxx
 #delete user
@@ Zeile 150: / Zeile 204: @@
 # create index
-curl -X PUT "https://localhost:9200/mein_index" -H "Content-Type: application/json" -d "{ \"settings\": { \"number_of_shards\": 5, \"number_of_replicas\": 2}, \"mappings\": { \"properties\": { \"field1\": { \"type\": \"text\" }, \"field2\": { \"type\": \"text\" } } } }" -u elastic:xxx
+curl -X PUT "http://localhost:9200/mein_index" -H "Content-Type: application/json" -d "{ \"settings\": { \"number_of_shards\": 5, \"number_of_replicas\": 2}, \"mappings\": { \"properties\": { \"field1\": { \"type\": \"text\" }, \"field2\": { \"type\": \"text\" } } } }" -u elastic:xxx
 #list indices
-curl -X GET "https://localhost:9200/_cat/indices?v" -u elastic:xxx
+curl -X GET "http://localhost:9200/_cat/indices?v" -u elastic:xxx
 #delete index
-curl -X DELETE "https://localhost:9200/mein_index" -u elastic:xxx
+curl -X DELETE "http://localhost:9200/mein_index" -u elastic:xxx
 #add entry
-curl -X POST "https://localhost:9200/mein_index/_doc" -H "Content-Type: application/json" -d "{\"field1\": \"testname\", \"field2\": \"testkeyword\"}" -u elastic:xxx
+curl -X POST "http://localhost:9200/mein_index/_doc" -H "Content-Type: application/json" -d "{\"field1\": \"testname\", \"field2\": \"testkeyword\"}" -u elastic:xxx
 #search entry
-curl -X GET "https://localhost:9200/mein_index/_search" -H "Content-Type: application/json" -d "{ \"query\": { \"match\": { \"field1\": \"testname\" } } }" -u elastic:xxx
+curl -X GET "http://localhost:9200/mein_index/_search" -H "Content-Type: application/json" -d "{ \"query\": { \"match\": { \"field1\": \"testname\" } } }" -u elastic:xxx
-curl -X GET "https://localhost:9200/mein_index/_search" -H "Content-Type: application/json" -d "{ \"query\": { \"wildcard\": { \"field1\": \"test*\" } } }" -u elastic:xxx
+curl -X GET "http://localhost:9200/mein_index/_search" -H "Content-Type: application/json" -d "{ \"query\": { \"wildcard\": { \"field1\": \"test*\" } } }" -u elastic:xxx
 # As Text
-curl -X GET "https://localhost:9200/_sql?format=txt" -H "Content-Type: application/json" -d "{\"query\": \"SELECT * FROM mein_index WHERE MATCH(field1,'hund') AND NOT MATCH(field2, 'katze')\"}" --insecure -u elastic:xxx
+curl -X GET "http://localhost:9200/_sql?format=txt" -H "Content-Type: application/json" -d "{\"query\": \"SELECT * FROM mein_index WHERE MATCH(field1,'hund') AND NOT MATCH(field2, 'katze')\"}" --insecure -u elastic:xxx
 # As JSON
-curl -X GET "https://localhost:9200/_sql?format=json" -H "Content-Type: application/json" -d "{\"query\": \"SELECT * FROM mein_index WHERE MATCH(field1,'hund') AND NOT MATCH(field2, 'katze')\"}" --insecure -u elastic:xxx
+curl -X GET "http://localhost:9200/_sql?format=json" -H "Content-Type: application/json" -d "{\"query\": \"SELECT * FROM mein_index WHERE MATCH(field1,'hund') AND NOT MATCH(field2, 'katze')\"}" --insecure -u elastic:xxx
 # For LIKE the field must be a "keyword"
@@ Zeile 179: / Zeile 233: @@
 =====Python=====
+<code python>
+import logging
+from urllib.parse import urljoin
+import requests
+from bs4 import BeautifulSoup
+logging.basicConfig(
+    format='%(asctime)s %(levelname)s:%(message)s',
+    level=logging.INFO)
+class Crawler:
+    def __init__(self, urls=[]):
+        self.visited_urls = []
+        self.urls_to_visit = urls
+    def download_url(self, url):
+        return requests.get(url).text
+    def get_linked_urls(self, url, html):
+        soup = BeautifulSoup(html, 'html.parser')
+        for link in soup.find_all('a'):
+            path = link.get('href')
+            if path and path.startswith('/'):
+                path = urljoin(url, path)
+            yield path
+    def add_url_to_visit(self, url):
+        if url not in self.visited_urls and url not in self.urls_to_visit:
+            self.urls_to_visit.append(url)
+    def crawl(self, url):
+        html = self.download_url(url)
+        for url in self.get_linked_urls(url, html):
+            self.add_url_to_visit(url)
+    def run(self):
+        while self.urls_to_visit:
+            url = self.urls_to_visit.pop(0)
+            logging.info(f'Crawling: {url}')
+            try:
+                self.crawl(url)
+            except Exception:
+                logging.exception(f'Failed to crawl: {url}')
+            finally:
+                self.visited_urls.append(url)
+if __name__ == '__main__':
+    Crawler(urls=['https://www.imdb.com/']).run()
+</code>
+====Create Index====
+<code python>
+import os
+from elasticsearch import Elasticsearch
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+es = Elasticsearch(
+    ['https://localhost:9200'],
+    basic_auth=("elastic", "secure"),
+    verify_certs=False
+)
+index_name = "test"
+if not es.indices.exists(index=index_name):
+    es.indices.create(index=index_name, body={
+        "mappings": {
+            "properties": {
+                "url": {"type": "text"},
+                "title": {"type": "text"}
+            }
+        }
+    })
+    print(f"Index '{index_name}' wurde erstellt.")
+else:
+    print(f"Index '{index_name}' existiert bereits.")
+</code>
+====Insert Data====
+<code python>
+import os
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import streaming_bulk
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+es = Elasticsearch(
+    ['https://localhost:9200'],
+    basic_auth=("elastic", "lunikoff"),
+    verify_certs=False
+)
+index_name = "test"
+# Testeinträge als Dokumente
+documents = [
+    {"url": "heute.at", "title": "Das war heute"},
+    {"url": "gestern.at", "title": "Das war gestern"},
+]
+# Dokumente in den Index einfügen
+for doc in documents:
+    es.index(index=index_name, body=doc)
+    print(f"Dokument hinzugefügt: {doc}")
+print("Alles wurde indexiert.")
+</code>
+====Query Data====
+<code python>
+from elasticsearch import Elasticsearch
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+es = Elasticsearch(
+    ['https://localhost:9200'],
+    basic_auth=("elastic", "lunikoff"),
+    verify_certs=False
+)
+data = {
+    "query": {
+        "wildcard": {
+                "url": "heute*"
+        }
+    },
+    "size": 100
+}
+response = es.search(index="test", body=data, scroll="1m")
+if 'hits' not in response or len(response['hits']['hits']) == 0:
+    print("Keine Ergebnisse gefunden.")
+    exit(1)
+scroll_id = response['_scroll_id']
+res_c = 0
+for hit in response['hits']['hits']:
+    res_c += 1
+    print("%(url)s; %(title)s" % hit["_source"])
+while True:
+    response = es.scroll(body={"scroll_id": scroll_id, "scroll": "1m"})
+    print("Got a response")
+    if 'hits' not in response or len(response['hits']['hits']) == 0:
+        break
+    for hit in response['hits']['hits']:
+        res_c += 1
+        print("%(.url)s; %(.title)s" % hit["_source"])
+es.clear_scroll(body={"scroll_id": scroll_id})
+print("We had", res_c, "results")
+</code>
+====Delete Index====
+<code python>
+import csv
+import glob
+import os
+from elasticsearch import Elasticsearch
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+es = Elasticsearch(
+    ['https://localhost:9200'],
+    basic_auth=("elastic", "secure"),
+    verify_certs=False
+)
+index_name = input('Index name: ')
+# Prüfen, ob der Index existiert und ihn löschen
+if es.indices.exists(index=index_name):
+    es.indices.delete(index=index_name)
+    print(f"Index '{index_name}' wurde gelöscht.")
+else:
+    print(f"Index '{index_name}' existiert nicht.")
+</code>
 ====Scroll Query====

MBCDN

Benutzer-Werkzeuge

Webseiten-Werkzeuge

Unterschiede

Seiten-Werkzeuge