Files
autogen/util/seed-memory/Program.cs
Kosta Petan d6b917faf4 Add service to enable github issues workflow (#1)
* big bang gitub workflows

* add missing settings in local.settings.json

* config refactor

* fix devlead plan response

* swap cosmos to table storage for metadata storage

* unify config via options

* azd-ify WIP

* add qdrant bicep WIP

* working azd provision setup

* consolidate SK version in projects

* replace localhost :)

* add fqdn to options

* httpclient fixes

* add managed identity to the function and assign contrib role

* qdrant endpoint setting

* add container instances cleanup code + wait on termination to upload to Github

* formatting fixes

* add tables in bicep

* local getting started WIP

* add azure setup instructions

* add the load-waf bits

* docs WIP

---------

Co-authored-by: Kosta Petan <Kosta.Petan@microsoft.com>
2023-08-28 20:57:56 +02:00

64 lines
2.8 KiB
C#

using UglyToad.PdfPig;
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Text;
using Microsoft.Extensions.Logging;
using System.Text;
using Microsoft.SemanticKernel.Connectors.Memory.Qdrant;
using Microsoft.SemanticKernel.Connectors.AI.OpenAI.TextEmbedding;
using Microsoft.SemanticKernel.Memory;
using System.Reflection;
class Program
{
static string WafFileName = "azure-well-architected.pdf";
static async Task Main(string[] args)
{
var kernelSettings = KernelSettings.LoadSettings();
var kernelConfig = new KernelConfig();
using ILoggerFactory loggerFactory = LoggerFactory.Create(builder =>
{
builder
.SetMinimumLevel(kernelSettings.LogLevel ?? LogLevel.Warning)
.AddConsole()
.AddDebug();
});
var memoryStore = new QdrantMemoryStore(new QdrantVectorDbClient(kernelSettings.QdrantEndpoint, 1536));
var embedingGeneration = new AzureTextEmbeddingGeneration(kernelSettings.EmbeddingDeploymentOrModelId, kernelSettings.Endpoint, kernelSettings.ApiKey);
var semanticTextMemory = new SemanticTextMemory(memoryStore, embedingGeneration);
var kernel = new KernelBuilder()
.WithLogger(loggerFactory.CreateLogger<IKernel>())
.WithAzureChatCompletionService(kernelSettings.DeploymentOrModelId, kernelSettings.Endpoint, kernelSettings.ApiKey, true, kernelSettings.ServiceId, true)
.WithMemory(semanticTextMemory)
.WithConfiguration(kernelConfig).Build();
await ImportDocumentAsync(kernel, WafFileName);
}
public static async Task ImportDocumentAsync(IKernel kernel, string filename)
{
var currentDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
var filePath = Path.Combine(currentDirectory, filename);
using var pdfDocument = PdfDocument.Open(File.OpenRead(filePath));
var pages = pdfDocument.GetPages();
foreach (var page in pages)
{
try
{
var text = ContentOrderTextExtractor.GetText(page);
var descr = text.Take(100);
await kernel.Memory.SaveInformationAsync(
collection: "waf-pages",
text: text,
id: $"{Guid.NewGuid()}",
description: $"Document: {descr}");
}
catch(Exception ex)
{
Console.WriteLine(ex.Message);
}
}
}
}