Opensearch是什麽

OpenSearch 是一个软件系列,由搜索引擎(也称为 OpenSearch)和 OpenSearch Dashboards(该搜索引擎的数据可视化仪表板)组成。 该软件始于 2021 年,是 Elasticsearch 和 Kibana 的一个分支,由 Amazon Web Services 领导开发

Docker 下使用 Opensearch

Install

docker pull opensearchproject/opensearch:1.2.4
docker run -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" opensearchproject/opensearch:1.2.4

Test

In a new terminal session, run:

curl -XGET --insecure -u 'admin:admin' 'https://localhost:9200'

Create your first index.

curl -XPUT --insecure -u 'admin:admin' 'https://localhost:9200/my-first-index'

Add some data to your newly created index.

curl -XPUT --insecure -u 'admin:admin' 'https://localhost:9200/my-first-index/_doc/1' -H 'Content-Type: application/json' -d '{"Description": "To be or not to be, that is the question."}'

Retrieve the data to see that it was added properly.

curl -XGET --insecure -u 'admin:admin' 'https://localhost:9200/my-first-index/_doc/1'

After verifying that the data is correct, delete the document.

curl -XDELETE --insecure -u 'admin:admin' 'https://localhost:9200/my-first-index/_doc/1'

Finally, delete the index.

curl -XDELETE --insecure -u 'admin:admin' 'https://localhost:9200/my-first-index/'

Use Opensearch via python

add_data

from opensearchpy import OpenSearch

host = 'localhost'
port = 9200
auth = ('admin', 'admin') # For testing only. Don't store credentials in code.
# ca_certs_path = '/full/path/to/root-ca.pem' # Provide a CA bundle if you use intermediate CAs with your root CA.

# Optional client certificates if you don't want to use HTTP basic authentication.
# client_cert_path = '/full/path/to/client.pem'
# client_key_path = '/full/path/to/client-key.pem'

# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = auth,
    # client_cert = client_cert_path,
    # client_key = client_key_path,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
    # ca_certs = ca_certs_path
)

# Create an index with non-default settings.
index_name = 'qa_index_384'

# document = {
#   'Q_text':'abc',
#   'Q_vec':[1,2,3,4],
#   'Ans':'ABC',
# }


from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
embedding = model.encode("朋友圈信息流", convert_to_tensor=True)
Q_vec = embedding.tolist()
document = {
  'Q_text':"朋友圈信息流",
  'Q_vec':Q_vec,
  'Ans':'ABC',
}
print(len(Q_vec))
id = '20'

response = client.index(
    index = index_name,
    body = document,
    id = id,
    refresh = True
)

print('\nAdding document:')
print(response)

create_index

from opensearchpy import OpenSearch

host = 'localhost'
port = 9200
auth = ('admin', 'admin') # For testing only. Don't store credentials in code.
# ca_certs_path = '/full/path/to/root-ca.pem' # Provide a CA bundle if you use intermediate CAs with your root CA.

# Optional client certificates if you don't want to use HTTP basic authentication.
# client_cert_path = '/full/path/to/client.pem'
# client_key_path = '/full/path/to/client-key.pem'

# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = auth,
    # client_cert = client_cert_path,
    # client_key = client_key_path,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
    # ca_certs = ca_certs_path
)

# Create an index with non-default settings.
index_name = 'qa_index_768'
index_body = {
    'settings':
    {
        'index': {
            "knn": True,
            "knn.algo_param.ef_search": 100
        }
    },
    "mappings": {
        "properties": {
            "Q_vec": {
                "type": "knn_vector",
                "dimension": 768,
                "method": {
                    "name": "hnsw",
                    "space_type": "l2",
                    "engine": "nmslib",
                    "parameters": {
                        "ef_construction": 128,
                        "m": 24
                    }
                }
            }
        }
    }
}
response = client.indices.create(index_name, body=index_body)
print('\nCreating index:')
print(response)

delete_data

from opensearchpy import OpenSearch

host = 'localhost'
port = 9200
auth = ('admin', 'admin') # For testing only. Don't store credentials in code.
# ca_certs_path = '/full/path/to/root-ca.pem' # Provide a CA bundle if you use intermediate CAs with your root CA.
index_name = 'qa_index_4'
# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = auth,
    # client_cert = client_cert_path,
    # client_key = client_key_path,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
    # ca_certs = ca_certs_path
)

response = client.delete(
    index = index_name,
    id = 2
)

print('\nDeleting document:')
print(response)


# response = client.indices.delete(
#     index = index_name
# )

# print('\nDeleting index:')
# print(response)

delete_index

from opensearchpy import OpenSearch

host = 'localhost'
port = 9200
auth = ('admin', 'admin') # For testing only. Don't store credentials in code.
# ca_certs_path = '/full/path/to/root-ca.pem' # Provide a CA bundle if you use intermediate CAs with your root CA.

# Optional client certificates if you don't want to use HTTP basic authentication.
# client_cert_path = '/full/path/to/client.pem'
# client_key_path = '/full/path/to/client-key.pem'

# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = auth,
    # client_cert = client_cert_path,
    # client_key = client_key_path,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
    # ca_certs = ca_certs_path
)

# index_name = ""

response = client.indices.delete(
    index = index_name
)

print('\nDeleting index:')
print(response)

search_data

from open_search import client, index_name
from encode import encode


def search_qa(query, size=5):
    _Q_vec = encode(query)

    query = {
        '_source': ['Q_text', 'Ans_url', 'Q_vec', "Ans_text"],
        'size': size,
        'query': {
            "bool": {
                "should": [
                    {
                        "knn": {
                            "Q_vec": {
                                "vector": _Q_vec,
                                "k": 2
                            }
                        }
                    },
                    {
                        "match_phrase": {
                            "Q_text": query
                        }
                    },
                    {
                        "match_phrase": {
                            "Ans_text": query
                        }
                    }
                ]
            },
        }
    }

    response = client.search(
        body=query,
        index=index_name
    )
    return